LLVM  15.0.1
PPCISelLowering.cpp
Go to the documentation of this file.
1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/IntrinsicsPowerPC.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/MC/MCContext.h"
75 #include "llvm/MC/MCExpr.h"
76 #include "llvm/MC/MCRegisterInfo.h"
77 #include "llvm/MC/MCSectionXCOFF.h"
78 #include "llvm/MC/MCSymbolXCOFF.h"
81 #include "llvm/Support/Casting.h"
82 #include "llvm/Support/CodeGen.h"
84 #include "llvm/Support/Compiler.h"
85 #include "llvm/Support/Debug.h"
87 #include "llvm/Support/Format.h"
88 #include "llvm/Support/KnownBits.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdint>
97 #include <iterator>
98 #include <list>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 
104 #define DEBUG_TYPE "ppc-lowering"
105 
106 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108 
109 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111 
112 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114 
115 static cl::opt<bool> DisableSCO("disable-ppc-sco",
116 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117 
118 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120 
121 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123 
125  "ppc-quadword-atomics",
126  cl::desc("enable quadword lock-free atomic operations"), cl::init(false),
127  cl::Hidden);
128 
129 static cl::opt<bool>
130  DisablePerfectShuffle("ppc-disable-perfect-shuffle",
131  cl::desc("disable vector permute decomposition"),
132  cl::init(true), cl::Hidden);
133 
135  "disable-auto-paired-vec-st",
136  cl::desc("disable automatically generated 32byte paired vector stores"),
137  cl::init(true), cl::Hidden);
138 
139 STATISTIC(NumTailCalls, "Number of tail calls");
140 STATISTIC(NumSiblingCalls, "Number of sibling calls");
141 STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
142 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
143 
144 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
145 
146 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
147 
148 static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
149 
150 // FIXME: Remove this once the bug has been fixed!
152 
154  const PPCSubtarget &STI)
155  : TargetLowering(TM), Subtarget(STI) {
156  // Initialize map that relates the PPC addressing modes to the computed flags
157  // of a load/store instruction. The map is used to determine the optimal
158  // addressing mode when selecting load and stores.
159  initializeAddrModeMap();
160  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
161  // arguments are at least 4/8 bytes aligned.
162  bool isPPC64 = Subtarget.isPPC64();
163  setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
164 
165  // Set up the register classes.
166  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
167  if (!useSoftFloat()) {
168  if (hasSPE()) {
169  addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
170  // EFPU2 APU only supports f32
171  if (!Subtarget.hasEFPU2())
172  addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
173  } else {
174  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
175  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
176  }
177  }
178 
179  // Match BITREVERSE to customized fast code sequence in the td file.
182 
183  // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
185 
186  // Custom lower inline assembly to check for special registers.
189 
190  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
191  for (MVT VT : MVT::integer_valuetypes()) {
194  }
195 
196  if (Subtarget.isISA3_0()) {
201  } else {
202  // No extending loads from f16 or HW conversions back and forth.
211  }
212 
214 
215  // PowerPC has pre-inc load and store's.
226  if (!Subtarget.hasSPE()) {
231  }
232 
233  // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
234  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
235  for (MVT VT : ScalarIntVTs) {
240  }
241 
242  if (Subtarget.useCRBits()) {
244 
245  if (isPPC64 || Subtarget.hasFPCVT()) {
248  isPPC64 ? MVT::i64 : MVT::i32);
251  isPPC64 ? MVT::i64 : MVT::i32);
252 
255  isPPC64 ? MVT::i64 : MVT::i32);
258  isPPC64 ? MVT::i64 : MVT::i32);
259 
262  isPPC64 ? MVT::i64 : MVT::i32);
265  isPPC64 ? MVT::i64 : MVT::i32);
266 
269  isPPC64 ? MVT::i64 : MVT::i32);
272  isPPC64 ? MVT::i64 : MVT::i32);
273  } else {
278  }
279 
280  // PowerPC does not support direct load/store of condition registers.
283 
284  // FIXME: Remove this once the ANDI glue bug is fixed:
285  if (ANDIGlueBug)
287 
288  for (MVT VT : MVT::integer_valuetypes()) {
292  }
293 
294  addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
295  }
296 
297  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
298  // PPC (the libcall is not available).
303 
304  // We do not currently implement these libm ops for PowerPC.
311 
312  // PowerPC has no SREM/UREM instructions unless we are on P9
313  // On P9 we may use a hardware instruction to compute the remainder.
314  // When the result of both the remainder and the division is required it is
315  // more efficient to compute the remainder from the result of the division
316  // rather than use the remainder instruction. The instructions are legalized
317  // directly because the DivRemPairsPass performs the transformation at the IR
318  // level.
319  if (Subtarget.isISA3_0()) {
324  } else {
329  }
330 
331  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
340 
341  // Handle constrained floating-point operations of scalar.
342  // TODO: Handle SPE specific operation.
348 
353 
354  if (!Subtarget.hasSPE()) {
357  }
358 
359  if (Subtarget.hasVSX()) {
362  }
363 
364  if (Subtarget.hasFSQRT()) {
367  }
368 
369  if (Subtarget.hasFPRND()) {
374 
379  }
380 
381  // We don't support sin/cos/sqrt/fmod/pow
392 
393  // MASS transformation for LLVM intrinsics with replicating fast-math flag
394  // to be consistent to PPCGenScalarMASSEntries pass
395  if (TM.getOptLevel() == CodeGenOpt::Aggressive) {
408  }
409 
410  if (Subtarget.hasSPE()) {
413  } else {
416  }
417 
418  if (Subtarget.hasSPE())
420 
422 
423  // If we're enabling GP optimizations, use hardware square root
424  if (!Subtarget.hasFSQRT() &&
425  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
426  Subtarget.hasFRE()))
428 
429  if (!Subtarget.hasFSQRT() &&
430  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
431  Subtarget.hasFRES()))
433 
434  if (Subtarget.hasFCPSGN()) {
437  } else {
440  }
441 
442  if (Subtarget.hasFPRND()) {
447 
452  }
453 
454  // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
455  // to speed up scalar BSWAP64.
456  // CTPOP or CTTZ were introduced in P8/P9 respectively
458  if (Subtarget.hasP9Vector() && Subtarget.isPPC64())
460  else
462  if (Subtarget.isISA3_0()) {
465  } else {
468  }
469 
470  if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
473  } else {
476  }
477 
478  // PowerPC does not have ROTR
481 
482  if (!Subtarget.useCRBits()) {
483  // PowerPC does not have Select
488  }
489 
490  // PowerPC wants to turn select_cc of FP into fsel when possible.
493 
494  // PowerPC wants to optimize integer setcc a bit
495  if (!Subtarget.useCRBits())
497 
498  if (Subtarget.hasFPU()) {
502 
506  }
507 
508  // PowerPC does not have BRCOND which requires SetCC
509  if (!Subtarget.useCRBits())
511 
513 
514  if (Subtarget.hasSPE()) {
515  // SPE has built-in conversions
522 
523  // SPE supports signaling compare of f32/f64.
526  } else {
527  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
530 
531  // PowerPC does not have [U|S]INT_TO_FP
536  }
537 
538  if (Subtarget.hasDirectMove() && isPPC64) {
543  if (TM.Options.UnsafeFPMath) {
552  }
553  } else {
558  }
559 
560  // We cannot sextinreg(i1). Expand to shifts.
562 
563  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
564  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
565  // support continuation, user-level threading, and etc.. As a result, no
566  // other SjLj exception interfaces are implemented and please don't build
567  // your own exception handling based on them.
568  // LLVM/Clang supports zero-cost DWARF exception handling.
571 
572  // We want to legalize GlobalAddress and ConstantPool nodes into the
573  // appropriate instructions to materialize the address.
584 
585  // TRAP is legal.
587 
588  // TRAMPOLINE is custom lowered.
591 
592  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
594 
595  if (Subtarget.is64BitELFABI()) {
596  // VAARG always uses double-word chunks, so promote anything smaller.
606  } else if (Subtarget.is32BitELFABI()) {
607  // VAARG is custom lowered with the 32-bit SVR4 ABI.
610  } else
612 
613  // VACOPY is custom lowered with the 32-bit SVR4 ABI.
614  if (Subtarget.is32BitELFABI())
616  else
618 
619  // Use the default implementation.
629 
630  // We want to custom lower some of our intrinsics.
636 
637  // To handle counter-based loop conditions.
639 
644 
645  // Comparisons that require checking two conditions.
646  if (Subtarget.hasSPE()) {
651  }
664 
667 
668  if (Subtarget.has64BitSupport()) {
669  // They also have instructions for converting between i64 and fp.
678  // This is just the low 32 bits of a (signed) fp->i64 conversion.
679  // We cannot do this with Promote because i64 is not a legal type.
682 
683  if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
686  }
687  } else {
688  // PowerPC does not have FP_TO_UINT on 32-bit implementations.
689  if (Subtarget.hasSPE()) {
692  } else {
695  }
696  }
697 
698  // With the instructions enabled under FPCVT, we can do everything.
699  if (Subtarget.hasFPCVT()) {
700  if (Subtarget.has64BitSupport()) {
709  }
710 
719  }
720 
721  if (Subtarget.use64BitRegs()) {
722  // 64-bit PowerPC implementations can support i64 types directly
723  addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
724  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
726  // 64-bit PowerPC wants to expand i128 shifts itself.
730  } else {
731  // 32-bit PowerPC wants to expand i64 shifts itself.
735  }
736 
737  // PowerPC has better expansions for funnel shifts than the generic
738  // TargetLowering::expandFunnelShift.
739  if (Subtarget.has64BitSupport()) {
742  }
745 
746  if (Subtarget.hasVSX()) {
751  }
752 
753  if (Subtarget.hasAltivec()) {
754  for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
759  }
760  // First set operation action for all vector types to expand. Then we
761  // will selectively turn on ones that can be effectively codegen'd.
762  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
763  // add/sub are legal for all supported vector VT's.
766 
767  // For v2i64, these are only valid with P8Vector. This is corrected after
768  // the loop.
769  if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
774  }
775  else {
780  }
781 
782  if (Subtarget.hasVSX()) {
785  }
786 
787  // Vector instructions introduced in P8
788  if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
791  }
792  else {
795  }
796 
797  // Vector instructions introduced in P9
798  if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
800  else
802 
803  // We promote all shuffles to v16i8.
806 
807  // We promote all non-typed operations to v4i32.
823 
824  // No other operations are legal.
862 
863  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
864  setTruncStoreAction(VT, InnerVT, Expand);
865  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
866  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
867  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
868  }
869  }
871  if (!Subtarget.hasP8Vector()) {
876  }
877 
878  // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
879  // with merges, splats, etc.
881 
882  // Vector truncates to sub-word integer that fit in an Altivec/VSX register
883  // are cheap, so handle them before they get expanded to scalar.
889 
895  Subtarget.useCRBits() ? Legal : Expand);
909 
910  // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
912  // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
913  if (Subtarget.hasAltivec())
914  for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
916  // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
917  if (Subtarget.hasP8Altivec())
919 
920  addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
921  addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
922  addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
923  addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
924 
927 
928  if (Subtarget.hasVSX()) {
932  }
933 
934  if (Subtarget.hasP8Altivec())
936  else
938 
939  if (Subtarget.isISA3_1()) {
958  }
959 
962 
965 
970 
971  // Altivec does not contain unordered floating-point compare instructions
976 
977  if (Subtarget.hasVSX()) {
980  if (Subtarget.hasP8Vector()) {
983  }
984  if (Subtarget.hasDirectMove() && isPPC64) {
993  }
995 
996  // The nearbyint variants are not allowed to raise the inexact exception
997  // so we can only code-gen them with unsafe math.
998  if (TM.Options.UnsafeFPMath) {
1001  }
1002 
1011 
1017 
1020 
1023 
1024  // Share the Altivec comparison restrictions.
1029 
1032 
1034 
1035  if (Subtarget.hasP8Vector())
1036  addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1037 
1038  addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1039 
1040  addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1041  addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1042  addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1043 
1044  if (Subtarget.hasP8Altivec()) {
1048 
1049  // 128 bit shifts can be accomplished via 3 instructions for SHL and
1050  // SRL, but not for SRA because of the instructions available:
1051  // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1052  // doing
1056 
1058  }
1059  else {
1063 
1065 
1066  // VSX v2i64 only supports non-arithmetic operations.
1069  }
1070 
1071  if (Subtarget.isISA3_1())
1073  else
1075 
1080 
1082 
1091 
1092  // Custom handling for partial vectors of integers converted to
1093  // floating point. We already have optimal handling for v2i32 through
1094  // the DAG combine, so those aren't necessary.
1111 
1118 
1121 
1122  // Handle constrained floating-point operations of vector.
1123  // The predictor is `hasVSX` because altivec instruction has
1124  // no exception but VSX vector instruction has.
1138 
1152 
1153  addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1154  addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1155 
1156  for (MVT FPT : MVT::fp_valuetypes())
1158 
1159  // Expand the SELECT to SELECT_CC
1161 
1164 
1165  // No implementation for these ops for PowerPC.
1171  }
1172 
1173  if (Subtarget.hasP8Altivec()) {
1174  addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1175  addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1176  }
1177 
1178  if (Subtarget.hasP9Vector()) {
1181 
1182  // 128 bit shifts can be accomplished via 3 instructions for SHL and
1183  // SRL, but not for SRA because of the instructions available:
1184  // VS{RL} and VS{RL}O.
1188 
1194 
1202 
1209 
1213 
1214  // Handle constrained floating-point operations of fp128
1235  } else if (Subtarget.hasVSX()) {
1238 
1241 
1242  // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1243  // fp_to_uint and int_to_fp.
1246 
1254 
1255  // Expand the fp_extend if the target type is fp128.
1258 
1259  // Expand the fp_round if the source type is fp128.
1260  for (MVT VT : {MVT::f32, MVT::f64}) {
1263  }
1264 
1269 
1270  // Lower following f128 select_cc pattern:
1271  // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1273 
1274  // We need to handle f128 SELECT_CC with integer result type.
1277  }
1278 
1279  if (Subtarget.hasP9Altivec()) {
1280  if (Subtarget.isISA3_1()) {
1285  } else {
1288  }
1296  }
1297 
1298  if (Subtarget.hasP10Vector()) {
1300  }
1301  }
1302 
1303  if (Subtarget.pairedVectorMemops()) {
1304  addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1307  }
1308  if (Subtarget.hasMMA()) {
1309  addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1313  }
1314 
1315  if (Subtarget.has64BitSupport())
1317 
1318  if (Subtarget.isISA3_1())
1320 
1322 
1323  if (!isPPC64) {
1326  }
1327 
1332  }
1333 
1335 
1336  if (Subtarget.hasAltivec()) {
1337  // Altivec instructions set fields to all zeros or all ones.
1339  }
1340 
1341  setLibcallName(RTLIB::MULO_I128, nullptr);
1342  if (!isPPC64) {
1343  // These libcalls are not available in 32-bit.
1344  setLibcallName(RTLIB::SHL_I128, nullptr);
1345  setLibcallName(RTLIB::SRL_I128, nullptr);
1346  setLibcallName(RTLIB::SRA_I128, nullptr);
1347  setLibcallName(RTLIB::MUL_I128, nullptr);
1348  setLibcallName(RTLIB::MULO_I64, nullptr);
1349  }
1350 
1351  if (!isPPC64)
1353  else if (shouldInlineQuadwordAtomics())
1355  else
1357 
1358  setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1359 
1360  // We have target-specific dag combine patterns for the following nodes:
1363  if (Subtarget.hasFPCVT())
1366  if (Subtarget.useCRBits())
1370 
1372 
1374 
1375  if (Subtarget.useCRBits()) {
1377  }
1378 
1379  if (Subtarget.hasP9Altivec()) {
1381  }
1382 
1383  setLibcallName(RTLIB::LOG_F128, "logf128");
1384  setLibcallName(RTLIB::LOG2_F128, "log2f128");
1385  setLibcallName(RTLIB::LOG10_F128, "log10f128");
1386  setLibcallName(RTLIB::EXP_F128, "expf128");
1387  setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1388  setLibcallName(RTLIB::SIN_F128, "sinf128");
1389  setLibcallName(RTLIB::COS_F128, "cosf128");
1390  setLibcallName(RTLIB::POW_F128, "powf128");
1391  setLibcallName(RTLIB::FMIN_F128, "fminf128");
1392  setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1393  setLibcallName(RTLIB::REM_F128, "fmodf128");
1394  setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1395  setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1396  setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1397  setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1398  setLibcallName(RTLIB::ROUND_F128, "roundf128");
1399  setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1400  setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1401  setLibcallName(RTLIB::RINT_F128, "rintf128");
1402  setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1403  setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1404  setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1405  setLibcallName(RTLIB::FMA_F128, "fmaf128");
1406 
1407  // With 32 condition bits, we don't need to sink (and duplicate) compares
1408  // aggressively in CodeGenPrep.
1409  if (Subtarget.useCRBits()) {
1412  }
1413 
1415 
1416  switch (Subtarget.getCPUDirective()) {
1417  default: break;
1418  case PPC::DIR_970:
1419  case PPC::DIR_A2:
1420  case PPC::DIR_E500:
1421  case PPC::DIR_E500mc:
1422  case PPC::DIR_E5500:
1423  case PPC::DIR_PWR4:
1424  case PPC::DIR_PWR5:
1425  case PPC::DIR_PWR5X:
1426  case PPC::DIR_PWR6:
1427  case PPC::DIR_PWR6X:
1428  case PPC::DIR_PWR7:
1429  case PPC::DIR_PWR8:
1430  case PPC::DIR_PWR9:
1431  case PPC::DIR_PWR10:
1432  case PPC::DIR_PWR_FUTURE:
1435  break;
1436  }
1437 
1438  if (Subtarget.enableMachineScheduler())
1440  else
1442 
1444 
1445  // The Freescale cores do better with aggressive inlining of memcpy and
1446  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1447  if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1448  Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1449  MaxStoresPerMemset = 32;
1451  MaxStoresPerMemcpy = 32;
1453  MaxStoresPerMemmove = 32;
1455  } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1456  // The A2 also benefits from (very) aggressive inlining of memcpy and
1457  // friends. The overhead of a the function call, even when warm, can be
1458  // over one hundred cycles.
1459  MaxStoresPerMemset = 128;
1460  MaxStoresPerMemcpy = 128;
1461  MaxStoresPerMemmove = 128;
1462  MaxLoadsPerMemcmp = 128;
1463  } else {
1464  MaxLoadsPerMemcmp = 8;
1466  }
1467 
1468  IsStrictFPEnabled = true;
1469 
1470  // Let the subtarget (CPU) decide if a predictable select is more expensive
1471  // than the corresponding branch. This information is used in CGP to decide
1472  // when to convert selects into branches.
1474 }
1475 
1476 // *********************************** NOTE ************************************
1477 // For selecting load and store instructions, the addressing modes are defined
1478 // as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1479 // patterns to match the load the store instructions.
1480 //
1481 // The TD definitions for the addressing modes correspond to their respective
1482 // Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1483 // on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1484 // address mode flags of a particular node. Afterwards, the computed address
1485 // flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1486 // addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1487 // accordingly, based on the preferred addressing mode.
1488 //
1489 // Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1490 // MemOpFlags contains all the possible flags that can be used to compute the
1491 // optimal addressing mode for load and store instructions.
1492 // AddrMode contains all the possible load and store addressing modes available
1493 // on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1494 //
1495 // When adding new load and store instructions, it is possible that new address
1496 // flags may need to be added into MemOpFlags, and a new addressing mode will
1497 // need to be added to AddrMode. An entry of the new addressing mode (consisting
1498 // of the minimal and main distinguishing address flags for the new load/store
1499 // instructions) will need to be added into initializeAddrModeMap() below.
1500 // Finally, when adding new addressing modes, the getAddrModeForFlags() will
1501 // need to be updated to account for selecting the optimal addressing mode.
1502 // *****************************************************************************
1503 /// Initialize the map that relates the different addressing modes of the load
1504 /// and store instructions to a set of flags. This ensures the load/store
1505 /// instruction is correctly matched during instruction selection.
1506 void PPCTargetLowering::initializeAddrModeMap() {
1507  AddrModesMap[PPC::AM_DForm] = {
1508  // LWZ, STW
1513  // LBZ, LHZ, STB, STH
1518  // LHA
1523  // LFS, LFD, STFS, STFD
1528  };
1529  AddrModesMap[PPC::AM_DSForm] = {
1530  // LWA
1534  // LD, STD
1538  // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1542  };
1543  AddrModesMap[PPC::AM_DQForm] = {
1544  // LXV, STXV
1548  };
1549  AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1551  // TODO: Add mapping for quadword load/store.
1552 }
1553 
1554 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1555 /// the desired ByVal argument alignment.
1556 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1557  if (MaxAlign == MaxMaxAlign)
1558  return;
1559  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1560  if (MaxMaxAlign >= 32 &&
1561  VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1562  MaxAlign = Align(32);
1563  else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1564  MaxAlign < 16)
1565  MaxAlign = Align(16);
1566  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1567  Align EltAlign;
1568  getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1569  if (EltAlign > MaxAlign)
1570  MaxAlign = EltAlign;
1571  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1572  for (auto *EltTy : STy->elements()) {
1573  Align EltAlign;
1574  getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1575  if (EltAlign > MaxAlign)
1576  MaxAlign = EltAlign;
1577  if (MaxAlign == MaxMaxAlign)
1578  break;
1579  }
1580  }
1581 }
1582 
1583 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1584 /// function arguments in the caller parameter area.
1586  const DataLayout &DL) const {
1587  // 16byte and wider vectors are passed on 16byte boundary.
1588  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1589  Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1590  if (Subtarget.hasAltivec())
1591  getMaxByValAlign(Ty, Alignment, Align(16));
1592  return Alignment.value();
1593 }
1594 
1596  return Subtarget.useSoftFloat();
1597 }
1598 
1600  return Subtarget.hasSPE();
1601 }
1602 
1604  return VT.isScalarInteger();
1605 }
1606 
1607 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1608  switch ((PPCISD::NodeType)Opcode) {
1609  case PPCISD::FIRST_NUMBER: break;
1610  case PPCISD::FSEL: return "PPCISD::FSEL";
1611  case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1612  case PPCISD::XSMINC: return "PPCISD::XSMINC";
1613  case PPCISD::FCFID: return "PPCISD::FCFID";
1614  case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1615  case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1616  case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1617  case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1618  case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1619  case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1620  case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1622  return "PPCISD::FP_TO_UINT_IN_VSR,";
1624  return "PPCISD::FP_TO_SINT_IN_VSR";
1625  case PPCISD::FRE: return "PPCISD::FRE";
1626  case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1627  case PPCISD::FTSQRT:
1628  return "PPCISD::FTSQRT";
1629  case PPCISD::FSQRT:
1630  return "PPCISD::FSQRT";
1631  case PPCISD::STFIWX: return "PPCISD::STFIWX";
1632  case PPCISD::VPERM: return "PPCISD::VPERM";
1633  case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1635  return "PPCISD::XXSPLTI_SP_TO_DP";
1636  case PPCISD::XXSPLTI32DX:
1637  return "PPCISD::XXSPLTI32DX";
1638  case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1639  case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1640  case PPCISD::VECSHL: return "PPCISD::VECSHL";
1641  case PPCISD::CMPB: return "PPCISD::CMPB";
1642  case PPCISD::Hi: return "PPCISD::Hi";
1643  case PPCISD::Lo: return "PPCISD::Lo";
1644  case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1645  case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1646  case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1647  case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1648  case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1649  case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1650  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1651  case PPCISD::SRL: return "PPCISD::SRL";
1652  case PPCISD::SRA: return "PPCISD::SRA";
1653  case PPCISD::SHL: return "PPCISD::SHL";
1654  case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1655  case PPCISD::CALL: return "PPCISD::CALL";
1656  case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1657  case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1658  case PPCISD::CALL_RM:
1659  return "PPCISD::CALL_RM";
1660  case PPCISD::CALL_NOP_RM:
1661  return "PPCISD::CALL_NOP_RM";
1662  case PPCISD::CALL_NOTOC_RM:
1663  return "PPCISD::CALL_NOTOC_RM";
1664  case PPCISD::MTCTR: return "PPCISD::MTCTR";
1665  case PPCISD::BCTRL: return "PPCISD::BCTRL";
1666  case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1667  case PPCISD::BCTRL_RM:
1668  return "PPCISD::BCTRL_RM";
1670  return "PPCISD::BCTRL_LOAD_TOC_RM";
1671  case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1672  case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1673  case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1674  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1675  case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1676  case PPCISD::MFVSR: return "PPCISD::MFVSR";
1677  case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1678  case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1679  case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1680  case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1682  return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1684  return "PPCISD::ANDI_rec_1_EQ_BIT";
1686  return "PPCISD::ANDI_rec_1_GT_BIT";
1687  case PPCISD::VCMP: return "PPCISD::VCMP";
1688  case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1689  case PPCISD::LBRX: return "PPCISD::LBRX";
1690  case PPCISD::STBRX: return "PPCISD::STBRX";
1691  case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1692  case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1693  case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1694  case PPCISD::STXSIX: return "PPCISD::STXSIX";
1695  case PPCISD::VEXTS: return "PPCISD::VEXTS";
1696  case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1697  case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1698  case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1699  case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1701  return "PPCISD::ST_VSR_SCAL_INT";
1702  case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1703  case PPCISD::BDNZ: return "PPCISD::BDNZ";
1704  case PPCISD::BDZ: return "PPCISD::BDZ";
1705  case PPCISD::MFFS: return "PPCISD::MFFS";
1706  case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1707  case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1708  case PPCISD::CR6SET: return "PPCISD::CR6SET";
1709  case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1710  case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1711  case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1712  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1713  case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1714  case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1715  case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1716  case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1717  case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1718  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1719  case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1720  case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1721  case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1722  case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1723  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1724  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1725  case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1726  case PPCISD::PADDI_DTPREL:
1727  return "PPCISD::PADDI_DTPREL";
1728  case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1729  case PPCISD::SC: return "PPCISD::SC";
1730  case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1731  case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1732  case PPCISD::RFEBB: return "PPCISD::RFEBB";
1733  case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1734  case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1735  case PPCISD::VABSD: return "PPCISD::VABSD";
1736  case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1737  case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1738  case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1739  case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1740  case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1741  case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1742  case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1744  return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1746  return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1747  case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1748  case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1749  case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1750  case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1751  case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1752  case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1753  case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1754  case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1756  return "PPCISD::STRICT_FADDRTZ";
1757  case PPCISD::STRICT_FCTIDZ:
1758  return "PPCISD::STRICT_FCTIDZ";
1759  case PPCISD::STRICT_FCTIWZ:
1760  return "PPCISD::STRICT_FCTIWZ";
1762  return "PPCISD::STRICT_FCTIDUZ";
1764  return "PPCISD::STRICT_FCTIWUZ";
1765  case PPCISD::STRICT_FCFID:
1766  return "PPCISD::STRICT_FCFID";
1767  case PPCISD::STRICT_FCFIDU:
1768  return "PPCISD::STRICT_FCFIDU";
1769  case PPCISD::STRICT_FCFIDS:
1770  return "PPCISD::STRICT_FCFIDS";
1772  return "PPCISD::STRICT_FCFIDUS";
1773  case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1774  }
1775  return nullptr;
1776 }
1777 
1779  EVT VT) const {
1780  if (!VT.isVector())
1781  return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1782 
1784 }
1785 
1787  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1788  return true;
1789 }
1790 
1791 //===----------------------------------------------------------------------===//
1792 // Node matching predicates, for use by the tblgen matching code.
1793 //===----------------------------------------------------------------------===//
1794 
1795 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1797  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1798  return CFP->getValueAPF().isZero();
1799  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1800  // Maybe this has already been legalized into the constant pool?
1801  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1802  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1803  return CFP->getValueAPF().isZero();
1804  }
1805  return false;
1806 }
1807 
1808 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1809 /// true if Op is undef or if it matches the specified value.
1810 static bool isConstantOrUndef(int Op, int Val) {
1811  return Op < 0 || Op == Val;
1812 }
1813 
1814 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1815 /// VPKUHUM instruction.
1816 /// The ShuffleKind distinguishes between big-endian operations with
1817 /// two different inputs (0), either-endian operations with two identical
1818 /// inputs (1), and little-endian operations with two different inputs (2).
1819 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1821  SelectionDAG &DAG) {
1822  bool IsLE = DAG.getDataLayout().isLittleEndian();
1823  if (ShuffleKind == 0) {
1824  if (IsLE)
1825  return false;
1826  for (unsigned i = 0; i != 16; ++i)
1827  if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1828  return false;
1829  } else if (ShuffleKind == 2) {
1830  if (!IsLE)
1831  return false;
1832  for (unsigned i = 0; i != 16; ++i)
1833  if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1834  return false;
1835  } else if (ShuffleKind == 1) {
1836  unsigned j = IsLE ? 0 : 1;
1837  for (unsigned i = 0; i != 8; ++i)
1838  if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1839  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1840  return false;
1841  }
1842  return true;
1843 }
1844 
1845 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1846 /// VPKUWUM instruction.
1847 /// The ShuffleKind distinguishes between big-endian operations with
1848 /// two different inputs (0), either-endian operations with two identical
1849 /// inputs (1), and little-endian operations with two different inputs (2).
1850 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1852  SelectionDAG &DAG) {
1853  bool IsLE = DAG.getDataLayout().isLittleEndian();
1854  if (ShuffleKind == 0) {
1855  if (IsLE)
1856  return false;
1857  for (unsigned i = 0; i != 16; i += 2)
1858  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1859  !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1860  return false;
1861  } else if (ShuffleKind == 2) {
1862  if (!IsLE)
1863  return false;
1864  for (unsigned i = 0; i != 16; i += 2)
1865  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1866  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1867  return false;
1868  } else if (ShuffleKind == 1) {
1869  unsigned j = IsLE ? 0 : 2;
1870  for (unsigned i = 0; i != 8; i += 2)
1871  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1872  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1873  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1874  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1875  return false;
1876  }
1877  return true;
1878 }
1879 
1880 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1881 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1882 /// current subtarget.
1883 ///
1884 /// The ShuffleKind distinguishes between big-endian operations with
1885 /// two different inputs (0), either-endian operations with two identical
1886 /// inputs (1), and little-endian operations with two different inputs (2).
1887 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1889  SelectionDAG &DAG) {
1890  const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1891  if (!Subtarget.hasP8Vector())
1892  return false;
1893 
1894  bool IsLE = DAG.getDataLayout().isLittleEndian();
1895  if (ShuffleKind == 0) {
1896  if (IsLE)
1897  return false;
1898  for (unsigned i = 0; i != 16; i += 4)
1899  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1900  !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1901  !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1902  !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1903  return false;
1904  } else if (ShuffleKind == 2) {
1905  if (!IsLE)
1906  return false;
1907  for (unsigned i = 0; i != 16; i += 4)
1908  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1909  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1910  !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1911  !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1912  return false;
1913  } else if (ShuffleKind == 1) {
1914  unsigned j = IsLE ? 0 : 4;
1915  for (unsigned i = 0; i != 8; i += 4)
1916  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1917  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1918  !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1919  !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1920  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1921  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1922  !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1923  !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1924  return false;
1925  }
1926  return true;
1927 }
1928 
1929 /// isVMerge - Common function, used to match vmrg* shuffles.
1930 ///
1931 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1932  unsigned LHSStart, unsigned RHSStart) {
1933  if (N->getValueType(0) != MVT::v16i8)
1934  return false;
1935  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1936  "Unsupported merge size!");
1937 
1938  for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1939  for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1940  if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1941  LHSStart+j+i*UnitSize) ||
1942  !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1943  RHSStart+j+i*UnitSize))
1944  return false;
1945  }
1946  return true;
1947 }
1948 
1949 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1950 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1951 /// The ShuffleKind distinguishes between big-endian merges with two
1952 /// different inputs (0), either-endian merges with two identical inputs (1),
1953 /// and little-endian merges with two different inputs (2). For the latter,
1954 /// the input operands are swapped (see PPCInstrAltivec.td).
1956  unsigned ShuffleKind, SelectionDAG &DAG) {
1957  if (DAG.getDataLayout().isLittleEndian()) {
1958  if (ShuffleKind == 1) // unary
1959  return isVMerge(N, UnitSize, 0, 0);
1960  else if (ShuffleKind == 2) // swapped
1961  return isVMerge(N, UnitSize, 0, 16);
1962  else
1963  return false;
1964  } else {
1965  if (ShuffleKind == 1) // unary
1966  return isVMerge(N, UnitSize, 8, 8);
1967  else if (ShuffleKind == 0) // normal
1968  return isVMerge(N, UnitSize, 8, 24);
1969  else
1970  return false;
1971  }
1972 }
1973 
1974 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1975 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1976 /// The ShuffleKind distinguishes between big-endian merges with two
1977 /// different inputs (0), either-endian merges with two identical inputs (1),
1978 /// and little-endian merges with two different inputs (2). For the latter,
1979 /// the input operands are swapped (see PPCInstrAltivec.td).
1981  unsigned ShuffleKind, SelectionDAG &DAG) {
1982  if (DAG.getDataLayout().isLittleEndian()) {
1983  if (ShuffleKind == 1) // unary
1984  return isVMerge(N, UnitSize, 8, 8);
1985  else if (ShuffleKind == 2) // swapped
1986  return isVMerge(N, UnitSize, 8, 24);
1987  else
1988  return false;
1989  } else {
1990  if (ShuffleKind == 1) // unary
1991  return isVMerge(N, UnitSize, 0, 0);
1992  else if (ShuffleKind == 0) // normal
1993  return isVMerge(N, UnitSize, 0, 16);
1994  else
1995  return false;
1996  }
1997 }
1998 
1999 /**
2000  * Common function used to match vmrgew and vmrgow shuffles
2001  *
2002  * The indexOffset determines whether to look for even or odd words in
2003  * the shuffle mask. This is based on the of the endianness of the target
2004  * machine.
2005  * - Little Endian:
2006  * - Use offset of 0 to check for odd elements
2007  * - Use offset of 4 to check for even elements
2008  * - Big Endian:
2009  * - Use offset of 0 to check for even elements
2010  * - Use offset of 4 to check for odd elements
2011  * A detailed description of the vector element ordering for little endian and
2012  * big endian can be found at
2013  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2014  * Targeting your applications - what little endian and big endian IBM XL C/C++
2015  * compiler differences mean to you
2016  *
2017  * The mask to the shuffle vector instruction specifies the indices of the
2018  * elements from the two input vectors to place in the result. The elements are
2019  * numbered in array-access order, starting with the first vector. These vectors
2020  * are always of type v16i8, thus each vector will contain 16 elements of size
2021  * 8. More info on the shuffle vector can be found in the
2022  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2023  * Language Reference.
2024  *
2025  * The RHSStartValue indicates whether the same input vectors are used (unary)
2026  * or two different input vectors are used, based on the following:
2027  * - If the instruction uses the same vector for both inputs, the range of the
2028  * indices will be 0 to 15. In this case, the RHSStart value passed should
2029  * be 0.
2030  * - If the instruction has two different vectors then the range of the
2031  * indices will be 0 to 31. In this case, the RHSStart value passed should
2032  * be 16 (indices 0-15 specify elements in the first vector while indices 16
2033  * to 31 specify elements in the second vector).
2034  *
2035  * \param[in] N The shuffle vector SD Node to analyze
2036  * \param[in] IndexOffset Specifies whether to look for even or odd elements
2037  * \param[in] RHSStartValue Specifies the starting index for the righthand input
2038  * vector to the shuffle_vector instruction
2039  * \return true iff this shuffle vector represents an even or odd word merge
2040  */
2041 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2042  unsigned RHSStartValue) {
2043  if (N->getValueType(0) != MVT::v16i8)
2044  return false;
2045 
2046  for (unsigned i = 0; i < 2; ++i)
2047  for (unsigned j = 0; j < 4; ++j)
2048  if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2049  i*RHSStartValue+j+IndexOffset) ||
2050  !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2051  i*RHSStartValue+j+IndexOffset+8))
2052  return false;
2053  return true;
2054 }
2055 
2056 /**
2057  * Determine if the specified shuffle mask is suitable for the vmrgew or
2058  * vmrgow instructions.
2059  *
2060  * \param[in] N The shuffle vector SD Node to analyze
2061  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2062  * \param[in] ShuffleKind Identify the type of merge:
2063  * - 0 = big-endian merge with two different inputs;
2064  * - 1 = either-endian merge with two identical inputs;
2065  * - 2 = little-endian merge with two different inputs (inputs are swapped for
2066  * little-endian merges).
2067  * \param[in] DAG The current SelectionDAG
2068  * \return true iff this shuffle mask
2069  */
2071  unsigned ShuffleKind, SelectionDAG &DAG) {
2072  if (DAG.getDataLayout().isLittleEndian()) {
2073  unsigned indexOffset = CheckEven ? 4 : 0;
2074  if (ShuffleKind == 1) // Unary
2075  return isVMerge(N, indexOffset, 0);
2076  else if (ShuffleKind == 2) // swapped
2077  return isVMerge(N, indexOffset, 16);
2078  else
2079  return false;
2080  }
2081  else {
2082  unsigned indexOffset = CheckEven ? 0 : 4;
2083  if (ShuffleKind == 1) // Unary
2084  return isVMerge(N, indexOffset, 0);
2085  else if (ShuffleKind == 0) // Normal
2086  return isVMerge(N, indexOffset, 16);
2087  else
2088  return false;
2089  }
2090  return false;
2091 }
2092 
2093 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2094 /// amount, otherwise return -1.
2095 /// The ShuffleKind distinguishes between big-endian operations with two
2096 /// different inputs (0), either-endian operations with two identical inputs
2097 /// (1), and little-endian operations with two different inputs (2). For the
2098 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
2099 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2100  SelectionDAG &DAG) {
2101  if (N->getValueType(0) != MVT::v16i8)
2102  return -1;
2103 
2104  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2105 
2106  // Find the first non-undef value in the shuffle mask.
2107  unsigned i;
2108  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2109  /*search*/;
2110 
2111  if (i == 16) return -1; // all undef.
2112 
2113  // Otherwise, check to see if the rest of the elements are consecutively
2114  // numbered from this value.
2115  unsigned ShiftAmt = SVOp->getMaskElt(i);
2116  if (ShiftAmt < i) return -1;
2117 
2118  ShiftAmt -= i;
2119  bool isLE = DAG.getDataLayout().isLittleEndian();
2120 
2121  if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2122  // Check the rest of the elements to see if they are consecutive.
2123  for (++i; i != 16; ++i)
2124  if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2125  return -1;
2126  } else if (ShuffleKind == 1) {
2127  // Check the rest of the elements to see if they are consecutive.
2128  for (++i; i != 16; ++i)
2129  if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2130  return -1;
2131  } else
2132  return -1;
2133 
2134  if (isLE)
2135  ShiftAmt = 16 - ShiftAmt;
2136 
2137  return ShiftAmt;
2138 }
2139 
2140 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2141 /// specifies a splat of a single element that is suitable for input to
2142 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2144  EVT VT = N->getValueType(0);
2145  if (VT == MVT::v2i64 || VT == MVT::v2f64)
2146  return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2147 
2148  assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2149  EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2150 
2151  // The consecutive indices need to specify an element, not part of two
2152  // different elements. So abandon ship early if this isn't the case.
2153  if (N->getMaskElt(0) % EltSize != 0)
2154  return false;
2155 
2156  // This is a splat operation if each element of the permute is the same, and
2157  // if the value doesn't reference the second vector.
2158  unsigned ElementBase = N->getMaskElt(0);
2159 
2160  // FIXME: Handle UNDEF elements too!
2161  if (ElementBase >= 16)
2162  return false;
2163 
2164  // Check that the indices are consecutive, in the case of a multi-byte element
2165  // splatted with a v16i8 mask.
2166  for (unsigned i = 1; i != EltSize; ++i)
2167  if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2168  return false;
2169 
2170  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2171  if (N->getMaskElt(i) < 0) continue;
2172  for (unsigned j = 0; j != EltSize; ++j)
2173  if (N->getMaskElt(i+j) != N->getMaskElt(j))
2174  return false;
2175  }
2176  return true;
2177 }
2178 
2179 /// Check that the mask is shuffling N byte elements. Within each N byte
2180 /// element of the mask, the indices could be either in increasing or
2181 /// decreasing order as long as they are consecutive.
2182 /// \param[in] N the shuffle vector SD Node to analyze
2183 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2184 /// Word/DoubleWord/QuadWord).
2185 /// \param[in] StepLen the delta indices number among the N byte element, if
2186 /// the mask is in increasing/decreasing order then it is 1/-1.
2187 /// \return true iff the mask is shuffling N byte elements.
2189  int StepLen) {
2190  assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2191  "Unexpected element width.");
2192  assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2193 
2194  unsigned NumOfElem = 16 / Width;
2195  unsigned MaskVal[16]; // Width is never greater than 16
2196  for (unsigned i = 0; i < NumOfElem; ++i) {
2197  MaskVal[0] = N->getMaskElt(i * Width);
2198  if ((StepLen == 1) && (MaskVal[0] % Width)) {
2199  return false;
2200  } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2201  return false;
2202  }
2203 
2204  for (unsigned int j = 1; j < Width; ++j) {
2205  MaskVal[j] = N->getMaskElt(i * Width + j);
2206  if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2207  return false;
2208  }
2209  }
2210  }
2211 
2212  return true;
2213 }
2214 
2215 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2216  unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2217  if (!isNByteElemShuffleMask(N, 4, 1))
2218  return false;
2219 
2220  // Now we look at mask elements 0,4,8,12
2221  unsigned M0 = N->getMaskElt(0) / 4;
2222  unsigned M1 = N->getMaskElt(4) / 4;
2223  unsigned M2 = N->getMaskElt(8) / 4;
2224  unsigned M3 = N->getMaskElt(12) / 4;
2225  unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2226  unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2227 
2228  // Below, let H and L be arbitrary elements of the shuffle mask
2229  // where H is in the range [4,7] and L is in the range [0,3].
2230  // H, 1, 2, 3 or L, 5, 6, 7
2231  if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2232  (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2233  ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2234  InsertAtByte = IsLE ? 12 : 0;
2235  Swap = M0 < 4;
2236  return true;
2237  }
2238  // 0, H, 2, 3 or 4, L, 6, 7
2239  if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2240  (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2241  ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2242  InsertAtByte = IsLE ? 8 : 4;
2243  Swap = M1 < 4;
2244  return true;
2245  }
2246  // 0, 1, H, 3 or 4, 5, L, 7
2247  if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2248  (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2249  ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2250  InsertAtByte = IsLE ? 4 : 8;
2251  Swap = M2 < 4;
2252  return true;
2253  }
2254  // 0, 1, 2, H or 4, 5, 6, L
2255  if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2256  (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2257  ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2258  InsertAtByte = IsLE ? 0 : 12;
2259  Swap = M3 < 4;
2260  return true;
2261  }
2262 
2263  // If both vector operands for the shuffle are the same vector, the mask will
2264  // contain only elements from the first one and the second one will be undef.
2265  if (N->getOperand(1).isUndef()) {
2266  ShiftElts = 0;
2267  Swap = true;
2268  unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2269  if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2270  InsertAtByte = IsLE ? 12 : 0;
2271  return true;
2272  }
2273  if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2274  InsertAtByte = IsLE ? 8 : 4;
2275  return true;
2276  }
2277  if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2278  InsertAtByte = IsLE ? 4 : 8;
2279  return true;
2280  }
2281  if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2282  InsertAtByte = IsLE ? 0 : 12;
2283  return true;
2284  }
2285  }
2286 
2287  return false;
2288 }
2289 
2291  bool &Swap, bool IsLE) {
2292  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2293  // Ensure each byte index of the word is consecutive.
2294  if (!isNByteElemShuffleMask(N, 4, 1))
2295  return false;
2296 
2297  // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2298  unsigned M0 = N->getMaskElt(0) / 4;
2299  unsigned M1 = N->getMaskElt(4) / 4;
2300  unsigned M2 = N->getMaskElt(8) / 4;
2301  unsigned M3 = N->getMaskElt(12) / 4;
2302 
2303  // If both vector operands for the shuffle are the same vector, the mask will
2304  // contain only elements from the first one and the second one will be undef.
2305  if (N->getOperand(1).isUndef()) {
2306  assert(M0 < 4 && "Indexing into an undef vector?");
2307  if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2308  return false;
2309 
2310  ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2311  Swap = false;
2312  return true;
2313  }
2314 
2315  // Ensure each word index of the ShuffleVector Mask is consecutive.
2316  if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2317  return false;
2318 
2319  if (IsLE) {
2320  if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2321  // Input vectors don't need to be swapped if the leading element
2322  // of the result is one of the 3 left elements of the second vector
2323  // (or if there is no shift to be done at all).
2324  Swap = false;
2325  ShiftElts = (8 - M0) % 8;
2326  } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2327  // Input vectors need to be swapped if the leading element
2328  // of the result is one of the 3 left elements of the first vector
2329  // (or if we're shifting by 4 - thereby simply swapping the vectors).
2330  Swap = true;
2331  ShiftElts = (4 - M0) % 4;
2332  }
2333 
2334  return true;
2335  } else { // BE
2336  if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2337  // Input vectors don't need to be swapped if the leading element
2338  // of the result is one of the 4 elements of the first vector.
2339  Swap = false;
2340  ShiftElts = M0;
2341  } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2342  // Input vectors need to be swapped if the leading element
2343  // of the result is one of the 4 elements of the right vector.
2344  Swap = true;
2345  ShiftElts = M0 - 4;
2346  }
2347 
2348  return true;
2349  }
2350 }
2351 
2353  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2354 
2355  if (!isNByteElemShuffleMask(N, Width, -1))
2356  return false;
2357 
2358  for (int i = 0; i < 16; i += Width)
2359  if (N->getMaskElt(i) != i + Width - 1)
2360  return false;
2361 
2362  return true;
2363 }
2364 
2366  return isXXBRShuffleMaskHelper(N, 2);
2367 }
2368 
2370  return isXXBRShuffleMaskHelper(N, 4);
2371 }
2372 
2374  return isXXBRShuffleMaskHelper(N, 8);
2375 }
2376 
2378  return isXXBRShuffleMaskHelper(N, 16);
2379 }
2380 
2381 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2382 /// if the inputs to the instruction should be swapped and set \p DM to the
2383 /// value for the immediate.
2384 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2385 /// AND element 0 of the result comes from the first input (LE) or second input
2386 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2387 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2388 /// mask.
2390  bool &Swap, bool IsLE) {
2391  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2392 
2393  // Ensure each byte index of the double word is consecutive.
2394  if (!isNByteElemShuffleMask(N, 8, 1))
2395  return false;
2396 
2397  unsigned M0 = N->getMaskElt(0) / 8;
2398  unsigned M1 = N->getMaskElt(8) / 8;
2399  assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2400 
2401  // If both vector operands for the shuffle are the same vector, the mask will
2402  // contain only elements from the first one and the second one will be undef.
2403  if (N->getOperand(1).isUndef()) {
2404  if ((M0 | M1) < 2) {
2405  DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2406  Swap = false;
2407  return true;
2408  } else
2409  return false;
2410  }
2411 
2412  if (IsLE) {
2413  if (M0 > 1 && M1 < 2) {
2414  Swap = false;
2415  } else if (M0 < 2 && M1 > 1) {
2416  M0 = (M0 + 2) % 4;
2417  M1 = (M1 + 2) % 4;
2418  Swap = true;
2419  } else
2420  return false;
2421 
2422  // Note: if control flow comes here that means Swap is already set above
2423  DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2424  return true;
2425  } else { // BE
2426  if (M0 < 2 && M1 > 1) {
2427  Swap = false;
2428  } else if (M0 > 1 && M1 < 2) {
2429  M0 = (M0 + 2) % 4;
2430  M1 = (M1 + 2) % 4;
2431  Swap = true;
2432  } else
2433  return false;
2434 
2435  // Note: if control flow comes here that means Swap is already set above
2436  DM = (M0 << 1) + (M1 & 1);
2437  return true;
2438  }
2439 }
2440 
2441 
2442 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2443 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2444 /// elements are counted from the left of the vector register).
2445 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2446  SelectionDAG &DAG) {
2447  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2448  assert(isSplatShuffleMask(SVOp, EltSize));
2449  EVT VT = SVOp->getValueType(0);
2450 
2451  if (VT == MVT::v2i64 || VT == MVT::v2f64)
2452  return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2453  : SVOp->getMaskElt(0);
2454 
2455  if (DAG.getDataLayout().isLittleEndian())
2456  return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2457  else
2458  return SVOp->getMaskElt(0) / EltSize;
2459 }
2460 
2461 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2462 /// by using a vspltis[bhw] instruction of the specified element size, return
2463 /// the constant being splatted. The ByteSize field indicates the number of
2464 /// bytes of each element [124] -> [bhw].
2465 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2466  SDValue OpVal;
2467 
2468  // If ByteSize of the splat is bigger than the element size of the
2469  // build_vector, then we have a case where we are checking for a splat where
2470  // multiple elements of the buildvector are folded together into a single
2471  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2472  unsigned EltSize = 16/N->getNumOperands();
2473  if (EltSize < ByteSize) {
2474  unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2475  SDValue UniquedVals[4];
2476  assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2477 
2478  // See if all of the elements in the buildvector agree across.
2479  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2480  if (N->getOperand(i).isUndef()) continue;
2481  // If the element isn't a constant, bail fully out.
2482  if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2483 
2484  if (!UniquedVals[i&(Multiple-1)].getNode())
2485  UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2486  else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2487  return SDValue(); // no match.
2488  }
2489 
2490  // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2491  // either constant or undef values that are identical for each chunk. See
2492  // if these chunks can form into a larger vspltis*.
2493 
2494  // Check to see if all of the leading entries are either 0 or -1. If
2495  // neither, then this won't fit into the immediate field.
2496  bool LeadingZero = true;
2497  bool LeadingOnes = true;
2498  for (unsigned i = 0; i != Multiple-1; ++i) {
2499  if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2500 
2501  LeadingZero &= isNullConstant(UniquedVals[i]);
2502  LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2503  }
2504  // Finally, check the least significant entry.
2505  if (LeadingZero) {
2506  if (!UniquedVals[Multiple-1].getNode())
2507  return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2508  int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2509  if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2510  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2511  }
2512  if (LeadingOnes) {
2513  if (!UniquedVals[Multiple-1].getNode())
2514  return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2515  int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2516  if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2517  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2518  }
2519 
2520  return SDValue();
2521  }
2522 
2523  // Check to see if this buildvec has a single non-undef value in its elements.
2524  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2525  if (N->getOperand(i).isUndef()) continue;
2526  if (!OpVal.getNode())
2527  OpVal = N->getOperand(i);
2528  else if (OpVal != N->getOperand(i))
2529  return SDValue();
2530  }
2531 
2532  if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2533 
2534  unsigned ValSizeInBytes = EltSize;
2535  uint64_t Value = 0;
2536  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2537  Value = CN->getZExtValue();
2538  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2539  assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2540  Value = FloatToBits(CN->getValueAPF().convertToFloat());
2541  }
2542 
2543  // If the splat value is larger than the element value, then we can never do
2544  // this splat. The only case that we could fit the replicated bits into our
2545  // immediate field for would be zero, and we prefer to use vxor for it.
2546  if (ValSizeInBytes < ByteSize) return SDValue();
2547 
2548  // If the element value is larger than the splat value, check if it consists
2549  // of a repeated bit pattern of size ByteSize.
2550  if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2551  return SDValue();
2552 
2553  // Properly sign extend the value.
2554  int MaskVal = SignExtend32(Value, ByteSize * 8);
2555 
2556  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2557  if (MaskVal == 0) return SDValue();
2558 
2559  // Finally, if this value fits in a 5 bit sext field, return it
2560  if (SignExtend32<5>(MaskVal) == MaskVal)
2561  return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2562  return SDValue();
2563 }
2564 
2565 //===----------------------------------------------------------------------===//
2566 // Addressing Mode Selection
2567 //===----------------------------------------------------------------------===//
2568 
2569 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2570 /// or 64-bit immediate, and if the value can be accurately represented as a
2571 /// sign extension from a 16-bit value. If so, this returns true and the
2572 /// immediate.
2574  if (!isa<ConstantSDNode>(N))
2575  return false;
2576 
2577  Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2578  if (N->getValueType(0) == MVT::i32)
2579  return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2580  else
2581  return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2582 }
2584  return isIntS16Immediate(Op.getNode(), Imm);
2585 }
2586 
2587 /// Used when computing address flags for selecting loads and stores.
2588 /// If we have an OR, check if the LHS and RHS are provably disjoint.
2589 /// An OR of two provably disjoint values is equivalent to an ADD.
2590 /// Most PPC load/store instructions compute the effective address as a sum,
2591 /// so doing this conversion is useful.
2592 static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2593  if (N.getOpcode() != ISD::OR)
2594  return false;
2595  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2596  if (!LHSKnown.Zero.getBoolValue())
2597  return false;
2598  KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2599  return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2600 }
2601 
2602 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2603 /// be represented as an indexed [r+r] operation.
2605  SDValue &Index,
2606  SelectionDAG &DAG) const {
2607  for (SDNode *U : N->uses()) {
2608  if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2609  if (Memop->getMemoryVT() == MVT::f64) {
2610  Base = N.getOperand(0);
2611  Index = N.getOperand(1);
2612  return true;
2613  }
2614  }
2615  }
2616  return false;
2617 }
2618 
2619 /// isIntS34Immediate - This method tests if value of node given can be
2620 /// accurately represented as a sign extension from a 34-bit value. If so,
2621 /// this returns true and the immediate.
2623  if (!isa<ConstantSDNode>(N))
2624  return false;
2625 
2626  Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2627  return isInt<34>(Imm);
2628 }
2630  return isIntS34Immediate(Op.getNode(), Imm);
2631 }
2632 
2633 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2634 /// can be represented as an indexed [r+r] operation. Returns false if it
2635 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2636 /// non-zero and N can be represented by a base register plus a signed 16-bit
2637 /// displacement, make a more precise judgement by checking (displacement % \p
2638 /// EncodingAlignment).
2641  MaybeAlign EncodingAlignment) const {
2642  // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2643  // a [pc+imm].
2644  if (SelectAddressPCRel(N, Base))
2645  return false;
2646 
2647  int16_t Imm = 0;
2648  if (N.getOpcode() == ISD::ADD) {
2649  // Is there any SPE load/store (f64), which can't handle 16bit offset?
2650  // SPE load/store can only handle 8-bit offsets.
2651  if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2652  return true;
2653  if (isIntS16Immediate(N.getOperand(1), Imm) &&
2654  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2655  return false; // r+i
2656  if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2657  return false; // r+i
2658 
2659  Base = N.getOperand(0);
2660  Index = N.getOperand(1);
2661  return true;
2662  } else if (N.getOpcode() == ISD::OR) {
2663  if (isIntS16Immediate(N.getOperand(1), Imm) &&
2664  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2665  return false; // r+i can fold it if we can.
2666 
2667  // If this is an or of disjoint bitfields, we can codegen this as an add
2668  // (for better address arithmetic) if the LHS and RHS of the OR are provably
2669  // disjoint.
2670  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2671 
2672  if (LHSKnown.Zero.getBoolValue()) {
2673  KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2674  // If all of the bits are known zero on the LHS or RHS, the add won't
2675  // carry.
2676  if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2677  Base = N.getOperand(0);
2678  Index = N.getOperand(1);
2679  return true;
2680  }
2681  }
2682  }
2683 
2684  return false;
2685 }
2686 
2687 // If we happen to be doing an i64 load or store into a stack slot that has
2688 // less than a 4-byte alignment, then the frame-index elimination may need to
2689 // use an indexed load or store instruction (because the offset may not be a
2690 // multiple of 4). The extra register needed to hold the offset comes from the
2691 // register scavenger, and it is possible that the scavenger will need to use
2692 // an emergency spill slot. As a result, we need to make sure that a spill slot
2693 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2694 // stack slot.
2695 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2696  // FIXME: This does not handle the LWA case.
2697  if (VT != MVT::i64)
2698  return;
2699 
2700  // NOTE: We'll exclude negative FIs here, which come from argument
2701  // lowering, because there are no known test cases triggering this problem
2702  // using packed structures (or similar). We can remove this exclusion if
2703  // we find such a test case. The reason why this is so test-case driven is
2704  // because this entire 'fixup' is only to prevent crashes (from the
2705  // register scavenger) on not-really-valid inputs. For example, if we have:
2706  // %a = alloca i1
2707  // %b = bitcast i1* %a to i64*
2708  // store i64* a, i64 b
2709  // then the store should really be marked as 'align 1', but is not. If it
2710  // were marked as 'align 1' then the indexed form would have been
2711  // instruction-selected initially, and the problem this 'fixup' is preventing
2712  // won't happen regardless.
2713  if (FrameIdx < 0)
2714  return;
2715 
2716  MachineFunction &MF = DAG.getMachineFunction();
2717  MachineFrameInfo &MFI = MF.getFrameInfo();
2718 
2719  if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2720  return;
2721 
2722  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2723  FuncInfo->setHasNonRISpills();
2724 }
2725 
2726 /// Returns true if the address N can be represented by a base register plus
2727 /// a signed 16-bit displacement [r+imm], and if it is not better
2728 /// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2729 /// displacements that are multiples of that value.
2731  SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2732  MaybeAlign EncodingAlignment) const {
2733  // FIXME dl should come from parent load or store, not from address
2734  SDLoc dl(N);
2735 
2736  // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2737  // a [pc+imm].
2738  if (SelectAddressPCRel(N, Base))
2739  return false;
2740 
2741  // If this can be more profitably realized as r+r, fail.
2742  if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2743  return false;
2744 
2745  if (N.getOpcode() == ISD::ADD) {
2746  int16_t imm = 0;
2747  if (isIntS16Immediate(N.getOperand(1), imm) &&
2748  (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2749  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2750  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2751  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2752  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2753  } else {
2754  Base = N.getOperand(0);
2755  }
2756  return true; // [r+i]
2757  } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2758  // Match LOAD (ADD (X, Lo(G))).
2759  assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2760  && "Cannot handle constant offsets yet!");
2761  Disp = N.getOperand(1).getOperand(0); // The global address.
2764  Disp.getOpcode() == ISD::TargetConstantPool ||
2765  Disp.getOpcode() == ISD::TargetJumpTable);
2766  Base = N.getOperand(0);
2767  return true; // [&g+r]
2768  }
2769  } else if (N.getOpcode() == ISD::OR) {
2770  int16_t imm = 0;
2771  if (isIntS16Immediate(N.getOperand(1), imm) &&
2772  (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2773  // If this is an or of disjoint bitfields, we can codegen this as an add
2774  // (for better address arithmetic) if the LHS and RHS of the OR are
2775  // provably disjoint.
2776  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2777 
2778  if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2779  // If all of the bits are known zero on the LHS or RHS, the add won't
2780  // carry.
2781  if (FrameIndexSDNode *FI =
2782  dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2783  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2784  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2785  } else {
2786  Base = N.getOperand(0);
2787  }
2788  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2789  return true;
2790  }
2791  }
2792  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2793  // Loading from a constant address.
2794 
2795  // If this address fits entirely in a 16-bit sext immediate field, codegen
2796  // this as "d, 0"
2797  int16_t Imm;
2798  if (isIntS16Immediate(CN, Imm) &&
2799  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2800  Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2801  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2802  CN->getValueType(0));
2803  return true;
2804  }
2805 
2806  // Handle 32-bit sext immediates with LIS + addr mode.
2807  if ((CN->getValueType(0) == MVT::i32 ||
2808  (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2809  (!EncodingAlignment ||
2810  isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2811  int Addr = (int)CN->getZExtValue();
2812 
2813  // Otherwise, break this down into an LIS + disp.
2814  Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2815 
2816  Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2817  MVT::i32);
2818  unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2819  Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2820  return true;
2821  }
2822  }
2823 
2824  Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2825  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2826  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2827  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2828  } else
2829  Base = N;
2830  return true; // [r+0]
2831 }
2832 
2833 /// Similar to the 16-bit case but for instructions that take a 34-bit
2834 /// displacement field (prefixed loads/stores).
2836  SDValue &Base,
2837  SelectionDAG &DAG) const {
2838  // Only on 64-bit targets.
2839  if (N.getValueType() != MVT::i64)
2840  return false;
2841 
2842  SDLoc dl(N);
2843  int64_t Imm = 0;
2844 
2845  if (N.getOpcode() == ISD::ADD) {
2846  if (!isIntS34Immediate(N.getOperand(1), Imm))
2847  return false;
2848  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2849  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2850  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2851  else
2852  Base = N.getOperand(0);
2853  return true;
2854  }
2855 
2856  if (N.getOpcode() == ISD::OR) {
2857  if (!isIntS34Immediate(N.getOperand(1), Imm))
2858  return false;
2859  // If this is an or of disjoint bitfields, we can codegen this as an add
2860  // (for better address arithmetic) if the LHS and RHS of the OR are
2861  // provably disjoint.
2862  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2863  if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2864  return false;
2865  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2866  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2867  else
2868  Base = N.getOperand(0);
2869  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2870  return true;
2871  }
2872 
2873  if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2874  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2875  Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2876  return true;
2877  }
2878 
2879  return false;
2880 }
2881 
2882 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2883 /// represented as an indexed [r+r] operation.
2885  SDValue &Index,
2886  SelectionDAG &DAG) const {
2887  // Check to see if we can easily represent this as an [r+r] address. This
2888  // will fail if it thinks that the address is more profitably represented as
2889  // reg+imm, e.g. where imm = 0.
2890  if (SelectAddressRegReg(N, Base, Index, DAG))
2891  return true;
2892 
2893  // If the address is the result of an add, we will utilize the fact that the
2894  // address calculation includes an implicit add. However, we can reduce
2895  // register pressure if we do not materialize a constant just for use as the
2896  // index register. We only get rid of the add if it is not an add of a
2897  // value and a 16-bit signed constant and both have a single use.
2898  int16_t imm = 0;
2899  if (N.getOpcode() == ISD::ADD &&
2900  (!isIntS16Immediate(N.getOperand(1), imm) ||
2901  !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2902  Base = N.getOperand(0);
2903  Index = N.getOperand(1);
2904  return true;
2905  }
2906 
2907  // Otherwise, do it the hard way, using R0 as the base register.
2908  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2909  N.getValueType());
2910  Index = N;
2911  return true;
2912 }
2913 
2914 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2915  Ty *PCRelCand = dyn_cast<Ty>(N);
2916  return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2917 }
2918 
2919 /// Returns true if this address is a PC Relative address.
2920 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2921 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2923  // This is a materialize PC Relative node. Always select this as PC Relative.
2924  Base = N;
2925  if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2926  return true;
2927  if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2928  isValidPCRelNode<GlobalAddressSDNode>(N) ||
2929  isValidPCRelNode<JumpTableSDNode>(N) ||
2930  isValidPCRelNode<BlockAddressSDNode>(N))
2931  return true;
2932  return false;
2933 }
2934 
2935 /// Returns true if we should use a direct load into vector instruction
2936 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2938 
2939  // If there are any other uses other than scalar to vector, then we should
2940  // keep it as a scalar load -> direct move pattern to prevent multiple
2941  // loads.
2942  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2943  if (!LD)
2944  return false;
2945 
2946  EVT MemVT = LD->getMemoryVT();
2947  if (!MemVT.isSimple())
2948  return false;
2949  switch(MemVT.getSimpleVT().SimpleTy) {
2950  case MVT::i64:
2951  break;
2952  case MVT::i32:
2953  if (!ST.hasP8Vector())
2954  return false;
2955  break;
2956  case MVT::i16:
2957  case MVT::i8:
2958  if (!ST.hasP9Vector())
2959  return false;
2960  break;
2961  default:
2962  return false;
2963  }
2964 
2965  SDValue LoadedVal(N, 0);
2966  if (!LoadedVal.hasOneUse())
2967  return false;
2968 
2969  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2970  UI != UE; ++UI)
2971  if (UI.getUse().get().getResNo() == 0 &&
2972  UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2973  UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2974  return false;
2975 
2976  return true;
2977 }
2978 
2979 /// getPreIndexedAddressParts - returns true by value, base pointer and
2980 /// offset pointer and addressing mode by reference if the node's address
2981 /// can be legally represented as pre-indexed load / store address.
2983  SDValue &Offset,
2984  ISD::MemIndexedMode &AM,
2985  SelectionDAG &DAG) const {
2986  if (DisablePPCPreinc) return false;
2987 
2988  bool isLoad = true;
2989  SDValue Ptr;
2990  EVT VT;
2991  Align Alignment;
2992  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2993  Ptr = LD->getBasePtr();
2994  VT = LD->getMemoryVT();
2995  Alignment = LD->getAlign();
2996  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2997  Ptr = ST->getBasePtr();
2998  VT = ST->getMemoryVT();
2999  Alignment = ST->getAlign();
3000  isLoad = false;
3001  } else
3002  return false;
3003 
3004  // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3005  // instructions because we can fold these into a more efficient instruction
3006  // instead, (such as LXSD).
3007  if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3008  return false;
3009  }
3010 
3011  // PowerPC doesn't have preinc load/store instructions for vectors
3012  if (VT.isVector())
3013  return false;
3014 
3015  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3016  // Common code will reject creating a pre-inc form if the base pointer
3017  // is a frame index, or if N is a store and the base pointer is either
3018  // the same as or a predecessor of the value being stored. Check for
3019  // those situations here, and try with swapped Base/Offset instead.
3020  bool Swap = false;
3021 
3022  if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3023  Swap = true;
3024  else if (!isLoad) {
3025  SDValue Val = cast<StoreSDNode>(N)->getValue();
3026  if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3027  Swap = true;
3028  }
3029 
3030  if (Swap)
3031  std::swap(Base, Offset);
3032 
3033  AM = ISD::PRE_INC;
3034  return true;
3035  }
3036 
3037  // LDU/STU can only handle immediates that are a multiple of 4.
3038  if (VT != MVT::i64) {
3039  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
3040  return false;
3041  } else {
3042  // LDU/STU need an address with at least 4-byte alignment.
3043  if (Alignment < Align(4))
3044  return false;
3045 
3046  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3047  return false;
3048  }
3049 
3050  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3051  // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3052  // sext i32 to i64 when addr mode is r+i.
3053  if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3054  LD->getExtensionType() == ISD::SEXTLOAD &&
3055  isa<ConstantSDNode>(Offset))
3056  return false;
3057  }
3058 
3059  AM = ISD::PRE_INC;
3060  return true;
3061 }
3062 
3063 //===----------------------------------------------------------------------===//
3064 // LowerOperation implementation
3065 //===----------------------------------------------------------------------===//
3066 
3067 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
3068 /// and LoOpFlags to the target MO flags.
3069 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3070  unsigned &HiOpFlags, unsigned &LoOpFlags,
3071  const GlobalValue *GV = nullptr) {
3072  HiOpFlags = PPCII::MO_HA;
3073  LoOpFlags = PPCII::MO_LO;
3074 
3075  // Don't use the pic base if not in PIC relocation model.
3076  if (IsPIC) {
3077  HiOpFlags |= PPCII::MO_PIC_FLAG;
3078  LoOpFlags |= PPCII::MO_PIC_FLAG;
3079  }
3080 }
3081 
3082 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3083  SelectionDAG &DAG) {
3084  SDLoc DL(HiPart);
3085  EVT PtrVT = HiPart.getValueType();
3086  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3087 
3088  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3089  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3090 
3091  // With PIC, the first instruction is actually "GR+hi(&G)".
3092  if (isPIC)
3093  Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3094  DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3095 
3096  // Generate non-pic code that has direct accesses to the constant pool.
3097  // The address of the global is just (hi(&g)+lo(&g)).
3098  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3099 }
3100 
3102  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3103  FuncInfo->setUsesTOCBasePtr();
3104 }
3105 
3106 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3108 }
3109 
3110 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3111  SDValue GA) const {
3112  const bool Is64Bit = Subtarget.isPPC64();
3113  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3114  SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3115  : Subtarget.isAIXABI()
3116  ? DAG.getRegister(PPC::R2, VT)
3117  : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3118  SDValue Ops[] = { GA, Reg };
3119  return DAG.getMemIntrinsicNode(
3120  PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3123 }
3124 
3125 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3126  SelectionDAG &DAG) const {
3127  EVT PtrVT = Op.getValueType();
3128  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3129  const Constant *C = CP->getConstVal();
3130 
3131  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3132  // The actual address of the GlobalValue is stored in the TOC.
3133  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3134  if (Subtarget.isUsingPCRelativeCalls()) {
3135  SDLoc DL(CP);
3136  EVT Ty = getPointerTy(DAG.getDataLayout());
3137  SDValue ConstPool = DAG.getTargetConstantPool(
3138  C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3139  return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3140  }
3141  setUsesTOCBasePtr(DAG);
3142  SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3143  return getTOCEntry(DAG, SDLoc(CP), GA);
3144  }
3145 
3146  unsigned MOHiFlag, MOLoFlag;
3147  bool IsPIC = isPositionIndependent();
3148  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3149 
3150  if (IsPIC && Subtarget.isSVR4ABI()) {
3151  SDValue GA =
3152  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3153  return getTOCEntry(DAG, SDLoc(CP), GA);
3154  }
3155 
3156  SDValue CPIHi =
3157  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3158  SDValue CPILo =
3159  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3160  return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3161 }
3162 
3163 // For 64-bit PowerPC, prefer the more compact relative encodings.
3164 // This trades 32 bits per jump table entry for one or two instructions
3165 // on the jump site.
3167  if (isJumpTableRelative())
3169 
3171 }
3172 
3175  return false;
3176  if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3177  return true;
3179 }
3180 
3182  SelectionDAG &DAG) const {
3183  if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3184  return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3185 
3186  switch (getTargetMachine().getCodeModel()) {
3187  case CodeModel::Small:
3188  case CodeModel::Medium:
3189  return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3190  default:
3191  return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3192  getPointerTy(DAG.getDataLayout()));
3193  }
3194 }
3195 
3196 const MCExpr *
3198  unsigned JTI,
3199  MCContext &Ctx) const {
3200  if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3201  return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3202 
3203  switch (getTargetMachine().getCodeModel()) {
3204  case CodeModel::Small:
3205  case CodeModel::Medium:
3206  return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3207  default:
3208  return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3209  }
3210 }
3211 
3212 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3213  EVT PtrVT = Op.getValueType();
3214  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3215 
3216  // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3217  if (Subtarget.isUsingPCRelativeCalls()) {
3218  SDLoc DL(JT);
3219  EVT Ty = getPointerTy(DAG.getDataLayout());
3220  SDValue GA =
3221  DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3222  SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3223  return MatAddr;
3224  }
3225 
3226  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3227  // The actual address of the GlobalValue is stored in the TOC.
3228  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3229  setUsesTOCBasePtr(DAG);
3230  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3231  return getTOCEntry(DAG, SDLoc(JT), GA);
3232  }
3233 
3234  unsigned MOHiFlag, MOLoFlag;
3235  bool IsPIC = isPositionIndependent();
3236  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3237 
3238  if (IsPIC && Subtarget.isSVR4ABI()) {
3239  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3241  return getTOCEntry(DAG, SDLoc(GA), GA);
3242  }
3243 
3244  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3245  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3246  return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3247 }
3248 
3249 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3250  SelectionDAG &DAG) const {
3251  EVT PtrVT = Op.getValueType();
3252  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3253  const BlockAddress *BA = BASDN->getBlockAddress();
3254 
3255  // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3256  if (Subtarget.isUsingPCRelativeCalls()) {
3257  SDLoc DL(BASDN);
3258  EVT Ty = getPointerTy(DAG.getDataLayout());
3259  SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3261  SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3262  return MatAddr;
3263  }
3264 
3265  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3266  // The actual BlockAddress is stored in the TOC.
3267  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3268  setUsesTOCBasePtr(DAG);
3269  SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3270  return getTOCEntry(DAG, SDLoc(BASDN), GA);
3271  }
3272 
3273  // 32-bit position-independent ELF stores the BlockAddress in the .got.
3274  if (Subtarget.is32BitELFABI() && isPositionIndependent())
3275  return getTOCEntry(
3276  DAG, SDLoc(BASDN),
3277  DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3278 
3279  unsigned MOHiFlag, MOLoFlag;
3280  bool IsPIC = isPositionIndependent();
3281  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3282  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3283  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3284  return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3285 }
3286 
3287 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3288  SelectionDAG &DAG) const {
3289  if (Subtarget.isAIXABI())
3290  return LowerGlobalTLSAddressAIX(Op, DAG);
3291 
3292  return LowerGlobalTLSAddressLinux(Op, DAG);
3293 }
3294 
3295 SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3296  SelectionDAG &DAG) const {
3297  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3298 
3299  if (DAG.getTarget().useEmulatedTLS())
3300  report_fatal_error("Emulated TLS is not yet supported on AIX");
3301 
3302  SDLoc dl(GA);
3303  const GlobalValue *GV = GA->getGlobal();
3304  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3305 
3306  // The general-dynamic model is the only access model supported for now, so
3307  // all the GlobalTLSAddress nodes are lowered with this model.
3308  // We need to generate two TOC entries, one for the variable offset, one for
3309  // the region handle. The global address for the TOC entry of the region
3310  // handle is created with the MO_TLSGDM_FLAG flag and the global address
3311  // for the TOC entry of the variable offset is created with MO_TLSGD_FLAG.
3312  SDValue VariableOffsetTGA =
3313  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3314  SDValue RegionHandleTGA =
3315  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3316  SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3317  SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3318  return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3319  RegionHandle);
3320 }
3321 
3322 SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3323  SelectionDAG &DAG) const {
3324  // FIXME: TLS addresses currently use medium model code sequences,
3325  // which is the most useful form. Eventually support for small and
3326  // large models could be added if users need it, at the cost of
3327  // additional complexity.
3328  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3329  if (DAG.getTarget().useEmulatedTLS())
3330  return LowerToTLSEmulatedModel(GA, DAG);
3331 
3332  SDLoc dl(GA);
3333  const GlobalValue *GV = GA->getGlobal();
3334  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3335  bool is64bit = Subtarget.isPPC64();
3336  const Module *M = DAG.getMachineFunction().getFunction().getParent();
3337  PICLevel::Level picLevel = M->getPICLevel();
3338 
3339  const TargetMachine &TM = getTargetMachine();
3340  TLSModel::Model Model = TM.getTLSModel(GV);
3341 
3342  if (Model == TLSModel::LocalExec) {
3343  if (Subtarget.isUsingPCRelativeCalls()) {
3344  SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3345  SDValue TGA = DAG.getTargetGlobalAddress(
3346  GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3347  SDValue MatAddr =
3348  DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3349  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3350  }
3351 
3352  SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3354  SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3356  SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3357  : DAG.getRegister(PPC::R2, MVT::i32);
3358 
3359  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3360  return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3361  }
3362 
3363  if (Model == TLSModel::InitialExec) {
3364  bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3365  SDValue TGA = DAG.getTargetGlobalAddress(
3366  GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3367  SDValue TGATLS = DAG.getTargetGlobalAddress(
3368  GV, dl, PtrVT, 0,
3370  SDValue TPOffset;
3371  if (IsPCRel) {
3372  SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3373  TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3374  MachinePointerInfo());
3375  } else {
3376  SDValue GOTPtr;
3377  if (is64bit) {
3378  setUsesTOCBasePtr(DAG);
3379  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3380  GOTPtr =
3381  DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3382  } else {
3383  if (!TM.isPositionIndependent())
3384  GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3385  else if (picLevel == PICLevel::SmallPIC)
3386  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3387  else
3388  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3389  }
3390  TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3391  }
3392  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3393  }
3394 
3396  if (Subtarget.isUsingPCRelativeCalls()) {
3397  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3399  return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3400  }
3401 
3402  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3403  SDValue GOTPtr;
3404  if (is64bit) {
3405  setUsesTOCBasePtr(DAG);
3406  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3407  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3408  GOTReg, TGA);
3409  } else {
3410  if (picLevel == PICLevel::SmallPIC)
3411  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3412  else
3413  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3414  }
3415  return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3416  GOTPtr, TGA, TGA);
3417  }
3418 
3419  if (Model == TLSModel::LocalDynamic) {
3420  if (Subtarget.isUsingPCRelativeCalls()) {
3421  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3423  SDValue MatPCRel =
3424  DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3425  return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3426  }
3427 
3428  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3429  SDValue GOTPtr;
3430  if (is64bit) {
3431  setUsesTOCBasePtr(DAG);
3432  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3433  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3434  GOTReg, TGA);
3435  } else {
3436  if (picLevel == PICLevel::SmallPIC)
3437  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3438  else
3439  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3440  }
3441  SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3442  PtrVT, GOTPtr, TGA, TGA);
3443  SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3444  PtrVT, TLSAddr, TGA);
3445  return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3446  }
3447 
3448  llvm_unreachable("Unknown TLS model!");
3449 }
3450 
3451 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3452  SelectionDAG &DAG) const {
3453  EVT PtrVT = Op.getValueType();
3454  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3455  SDLoc DL(GSDN);
3456  const GlobalValue *GV = GSDN->getGlobal();
3457 
3458  // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3459  // The actual address of the GlobalValue is stored in the TOC.
3460  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3461  if (Subtarget.isUsingPCRelativeCalls()) {
3462  EVT Ty = getPointerTy(DAG.getDataLayout());
3463  if (isAccessedAsGotIndirect(Op)) {
3464  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3467  SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3468  SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3469  MachinePointerInfo());
3470  return Load;
3471  } else {
3472  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3474  return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3475  }
3476  }
3477  setUsesTOCBasePtr(DAG);
3478  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3479  return getTOCEntry(DAG, DL, GA);
3480  }
3481 
3482  unsigned MOHiFlag, MOLoFlag;
3483  bool IsPIC = isPositionIndependent();
3484  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3485 
3486  if (IsPIC && Subtarget.isSVR4ABI()) {
3487  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3488  GSDN->getOffset(),
3490  return getTOCEntry(DAG, DL, GA);
3491  }
3492 
3493  SDValue GAHi =
3494  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3495  SDValue GALo =
3496  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3497 
3498  return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3499 }
3500 
3501 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3502  bool IsStrict = Op->isStrictFPOpcode();
3503  ISD::CondCode CC =
3504  cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3505  SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3506  SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3507  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3508  EVT LHSVT = LHS.getValueType();
3509  SDLoc dl(Op);
3510 
3511  // Soften the setcc with libcall if it is fp128.
3512  if (LHSVT == MVT::f128) {
3513  assert(!Subtarget.hasP9Vector() &&
3514  "SETCC for f128 is already legal under Power9!");
3515  softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3516  Op->getOpcode() == ISD::STRICT_FSETCCS);
3517  if (RHS.getNode())
3518  LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3519  DAG.getCondCode(CC));
3520  if (IsStrict)
3521  return DAG.getMergeValues({LHS, Chain}, dl);
3522  return LHS;
3523  }
3524 
3525  assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3526 
3527  if (Op.getValueType() == MVT::v2i64) {
3528  // When the operands themselves are v2i64 values, we need to do something
3529  // special because VSX has no underlying comparison operations for these.
3530  if (LHS.getValueType() == MVT::v2i64) {
3531  // Equality can be handled by casting to the legal type for Altivec
3532  // comparisons, everything else needs to be expanded.
3533  if (CC != ISD::SETEQ && CC != ISD::SETNE)
3534  return SDValue();
3535  SDValue SetCC32 = DAG.getSetCC(
3536  dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3537  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3538  int ShuffV[] = {1, 0, 3, 2};
3539  SDValue Shuff =
3540  DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3541  return DAG.getBitcast(MVT::v2i64,
3542  DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3543  dl, MVT::v4i32, Shuff, SetCC32));
3544  }
3545 
3546  // We handle most of these in the usual way.
3547  return Op;
3548  }
3549 
3550  // If we're comparing for equality to zero, expose the fact that this is
3551  // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3552  // fold the new nodes.
3553  if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3554  return V;
3555 
3556  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3557  // Leave comparisons against 0 and -1 alone for now, since they're usually
3558  // optimized. FIXME: revisit this when we can custom lower all setcc
3559  // optimizations.
3560  if (C->isAllOnes() || C->isZero())
3561  return SDValue();
3562  }
3563 
3564  // If we have an integer seteq/setne, turn it into a compare against zero
3565  // by xor'ing the rhs with the lhs, which is faster than setting a
3566  // condition register, reading it back out, and masking the correct bit. The
3567  // normal approach here uses sub to do this instead of xor. Using xor exposes
3568  // the result to other bit-twiddling opportunities.
3569  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3570  EVT VT = Op.getValueType();
3571  SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3572  return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3573  }
3574  return SDValue();
3575 }
3576 
3577 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3578  SDNode *Node = Op.getNode();
3579  EVT VT = Node->getValueType(0);
3580  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3581  SDValue InChain = Node->getOperand(0);
3582  SDValue VAListPtr = Node->getOperand(1);
3583  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3584  SDLoc dl(Node);
3585 
3586  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3587 
3588  // gpr_index
3589  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3590  VAListPtr, MachinePointerInfo(SV), MVT::i8);
3591  InChain = GprIndex.getValue(1);
3592 
3593  if (VT == MVT::i64) {
3594  // Check if GprIndex is even
3595  SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3596  DAG.getConstant(1, dl, MVT::i32));
3597  SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3598  DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3599  SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3600  DAG.getConstant(1, dl, MVT::i32));
3601  // Align GprIndex to be even if it isn't
3602  GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3603  GprIndex);
3604  }
3605 
3606  // fpr index is 1 byte after gpr
3607  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3608  DAG.getConstant(1, dl, MVT::i32));
3609 
3610  // fpr
3611  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3612  FprPtr, MachinePointerInfo(SV), MVT::i8);
3613  InChain = FprIndex.getValue(1);
3614 
3615  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3616  DAG.getConstant(8, dl, MVT::i32));
3617 
3618  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3619  DAG.getConstant(4, dl, MVT::i32));
3620 
3621  // areas
3622  SDValue OverflowArea =
3623  DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3624  InChain = OverflowArea.getValue(1);
3625 
3626  SDValue RegSaveArea =
3627  DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3628  InChain = RegSaveArea.getValue(1);
3629 
3630  // select overflow_area if index > 8
3631  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3632  DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3633 
3634  // adjustment constant gpr_index * 4/8
3635  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3636  VT.isInteger() ? GprIndex : FprIndex,
3637  DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3638  MVT::i32));
3639 
3640  // OurReg = RegSaveArea + RegConstant
3641  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3642  RegConstant);
3643 
3644  // Floating types are 32 bytes into RegSaveArea
3645  if (VT.isFloatingPoint())
3646  OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3647  DAG.getConstant(32, dl, MVT::i32));
3648 
3649  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3650  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3651  VT.isInteger() ? GprIndex : FprIndex,
3652  DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3653  MVT::i32));
3654 
3655  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3656  VT.isInteger() ? VAListPtr : FprPtr,
3658 
3659  // determine if we should load from reg_save_area or overflow_area
3660  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3661 
3662  // increase overflow_area by 4/8 if gpr/fpr > 8
3663  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3664  DAG.getConstant(VT.isInteger() ? 4 : 8,
3665  dl, MVT::i32));
3666 
3667  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3668  OverflowAreaPlusN);
3669 
3670  InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3672 
3673  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3674 }
3675 
3676 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3677  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3678 
3679  // We have to copy the entire va_list struct:
3680  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3681  return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3682  DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3683  false, true, false, MachinePointerInfo(),
3684  MachinePointerInfo());
3685 }
3686 
3687 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3688  SelectionDAG &DAG) const {
3689  if (Subtarget.isAIXABI())
3690  report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3691 
3692  return Op.getOperand(0);
3693 }
3694 
3695 SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3696  MachineFunction &MF = DAG.getMachineFunction();
3697  PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3698 
3699  assert((Op.getOpcode() == ISD::INLINEASM ||
3700  Op.getOpcode() == ISD::INLINEASM_BR) &&
3701  "Expecting Inline ASM node.");
3702 
3703  // If an LR store is already known to be required then there is not point in
3704  // checking this ASM as well.
3705  if (MFI.isLRStoreRequired())
3706  return Op;
3707 
3708  // Inline ASM nodes have an optional last operand that is an incoming Flag of
3709  // type MVT::Glue. We want to ignore this last operand if that is the case.
3710  unsigned NumOps = Op.getNumOperands();
3711  if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3712  --NumOps;
3713 
3714  // Check all operands that may contain the LR.
3715  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3716  unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
3717  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
3718  ++i; // Skip the ID value.
3719 
3720  switch (InlineAsm::getKind(Flags)) {
3721  default:
3722  llvm_unreachable("Bad flags!");
3724  case InlineAsm::Kind_Imm:
3725  case InlineAsm::Kind_Mem:
3726  i += NumVals;
3727  break;
3731  for (; NumVals; --NumVals, ++i) {
3732  Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3733  if (Reg != PPC::LR && Reg != PPC::LR8)
3734  continue;
3735  MFI.setLRStoreRequired();
3736  return Op;
3737  }
3738  break;
3739  }
3740  }
3741  }
3742 
3743  return Op;
3744 }
3745 
3746 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3747  SelectionDAG &DAG) const {
3748  if (Subtarget.isAIXABI())
3749  report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3750 
3751  SDValue Chain = Op.getOperand(0);
3752  SDValue Trmp = Op.getOperand(1); // trampoline
3753  SDValue FPtr = Op.getOperand(2); // nested function
3754  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3755  SDLoc dl(Op);
3756 
3757  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3758  bool isPPC64 = (PtrVT == MVT::i64);
3759  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3760 
3762  TargetLowering::ArgListEntry Entry;
3763 
3764  Entry.Ty = IntPtrTy;
3765  Entry.Node = Trmp; Args.push_back(Entry);
3766 
3767  // TrampSize == (isPPC64 ? 48 : 40);
3768  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3769  isPPC64 ? MVT::i64 : MVT::i32);
3770  Args.push_back(Entry);
3771 
3772  Entry.Node = FPtr; Args.push_back(Entry);
3773  Entry.Node = Nest; Args.push_back(Entry);
3774 
3775  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3777  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3779  DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3780 
3781  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3782  return CallResult.second;
3783 }
3784 
3785 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3786  MachineFunction &MF = DAG.getMachineFunction();
3787  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3788  EVT PtrVT = getPointerTy(MF.getDataLayout());
3789 
3790  SDLoc dl(Op);
3791 
3792  if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3793  // vastart just stores the address of the VarArgsFrameIndex slot into the
3794  // memory location argument.
3795  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3796  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3797  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3798  MachinePointerInfo(SV));
3799  }
3800 
3801  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3802  // We suppose the given va_list is already allocated.
3803  //
3804  // typedef struct {
3805  // char gpr; /* index into the array of 8 GPRs
3806  // * stored in the register save area
3807  // * gpr=0 corresponds to r3,
3808  // * gpr=1 to r4, etc.
3809  // */
3810  // char fpr; /* index into the array of 8 FPRs
3811  // * stored in the register save area
3812  // * fpr=0 corresponds to f1,
3813  // * fpr=1 to f2, etc.
3814  // */
3815  // char *overflow_arg_area;
3816  // /* location on stack that holds
3817  // * the next overflow argument
3818  // */
3819  // char *reg_save_area;
3820  // /* where r3:r10 and f1:f8 (if saved)
3821  // * are stored
3822  // */
3823  // } va_list[1];
3824 
3825  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3826  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3827  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3828  PtrVT);
3829  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3830  PtrVT);
3831 
3832  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3833  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3834 
3835  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3836  SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3837 
3838  uint64_t FPROffset = 1;
3839  SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3840 
3841  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3842 
3843  // Store first byte : number of int regs
3844  SDValue firstStore =
3845  DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3847  uint64_t nextOffset = FPROffset;
3848  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3849  ConstFPROffset);
3850 
3851  // Store second byte : number of float regs
3852  SDValue secondStore =
3853  DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3854  MachinePointerInfo(SV, nextOffset), MVT::i8);
3855  nextOffset += StackOffset;
3856  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3857 
3858  // Store second word : arguments given on stack
3859  SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3860  MachinePointerInfo(SV, nextOffset));
3861  nextOffset += FrameOffset;
3862  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3863 
3864  // Store third word : arguments given in registers
3865  return DAG.getStore(thirdStore, dl, FR, nextPtr,
3866  MachinePointerInfo(SV, nextOffset));
3867 }
3868 
3869 /// FPR - The set of FP registers that should be allocated for arguments
3870 /// on Darwin and AIX.
3871 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3872  PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3873  PPC::F11, PPC::F12, PPC::F13};
3874 
3875 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3876 /// the stack.
3877 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3878  unsigned PtrByteSize) {
3879  unsigned ArgSize = ArgVT.getStoreSize();
3880  if (Flags.isByVal())
3881  ArgSize = Flags.getByValSize();
3882 
3883  // Round up to multiples of the pointer size, except for array members,
3884  // which are always packed.
3885  if (!Flags.isInConsecutiveRegs())
3886  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3887 
3888  return ArgSize;
3889 }
3890 
3891 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3892 /// on the stack.
3894  ISD::ArgFlagsTy Flags,
3895  unsigned PtrByteSize) {
3896  Align Alignment(PtrByteSize);
3897 
3898  // Altivec parameters are padded to a 16 byte boundary.
3899  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3900  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3901  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3902  ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3903  Alignment = Align(16);
3904 
3905  // ByVal parameters are aligned as requested.
3906  if (Flags.isByVal()) {
3907  auto BVAlign = Flags.getNonZeroByValAlign();
3908  if (BVAlign > PtrByteSize) {
3909  if (BVAlign.value() % PtrByteSize != 0)
3911  "ByVal alignment is not a multiple of the pointer size");
3912 
3913  Alignment = BVAlign;
3914  }
3915  }
3916 
3917  // Array members are always packed to their original alignment.
3918  if (Flags.isInConsecutiveRegs()) {
3919  // If the array member was split into multiple registers, the first
3920  // needs to be aligned to the size of the full type. (Except for
3921  // ppcf128, which is only aligned as its f64 components.)
3922  if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3923  Alignment = Align(OrigVT.getStoreSize());
3924  else
3925  Alignment = Align(ArgVT.getStoreSize());
3926  }
3927 
3928  return Alignment;
3929 }
3930 
3931 /// CalculateStackSlotUsed - Return whether this argument will use its
3932 /// stack slot (instead of being passed in registers). ArgOffset,
3933 /// AvailableFPRs, and AvailableVRs must hold the current argument
3934 /// position, and will be updated to account for this argument.
3935 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3936  unsigned PtrByteSize, unsigned LinkageSize,
3937  unsigned ParamAreaSize, unsigned &ArgOffset,
3938  unsigned &AvailableFPRs,
3939  unsigned &AvailableVRs) {
3940  bool UseMemory = false;
3941 
3942  // Respect alignment of argument on the stack.
3943  Align Alignment =
3944  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3945  ArgOffset = alignTo(ArgOffset, Alignment);
3946  // If there's no space left in the argument save area, we must
3947  // use memory (this check also catches zero-sized arguments).
3948  if (ArgOffset >= LinkageSize + ParamAreaSize)
3949  UseMemory = true;
3950 
3951  // Allocate argument on the stack.
3952  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3953  if (Flags.isInConsecutiveRegsLast())
3954  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3955  // If we overran the argument save area, we must use memory
3956  // (this check catches arguments passed partially in memory)
3957  if (ArgOffset > LinkageSize + ParamAreaSize)
3958  UseMemory = true;
3959 
3960  // However, if the argument is actually passed in an FPR or a VR,
3961  // we don't use memory after all.
3962  if (!Flags.isByVal()) {
3963  if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3964  if (AvailableFPRs > 0) {
3965  --AvailableFPRs;
3966  return false;
3967  }
3968  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3969  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3970  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3971  ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3972  if (AvailableVRs > 0) {
3973  --AvailableVRs;
3974  return false;
3975  }
3976  }
3977 
3978  return UseMemory;
3979 }
3980 
3981 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3982 /// ensure minimum alignment required for target.
3984  unsigned NumBytes) {
3985  return alignTo(NumBytes, Lowering->getStackAlign());
3986 }
3987 
3988 SDValue PPCTargetLowering::LowerFormalArguments(
3989  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3990  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3991  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3992  if (Subtarget.isAIXABI())
3993  return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3994  InVals);
3995  if (Subtarget.is64BitELFABI())
3996  return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3997  InVals);
3998  assert(Subtarget.is32BitELFABI());
3999  return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4000  InVals);
4001 }
4002 
4003 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4004  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4005  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4006  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4007 
4008  // 32-bit SVR4 ABI Stack Frame Layout:
4009  // +-----------------------------------+
4010  // +--> | Back chain |
4011  // | +-----------------------------------+
4012  // | | Floating-point register save area |
4013  // | +-----------------------------------+
4014  // | | General register save area |
4015  // | +-----------------------------------+
4016  // | | CR save word |
4017  // | +-----------------------------------+
4018  // | | VRSAVE save word |
4019  // | +-----------------------------------+
4020  // | | Alignment padding |
4021  // | +-----------------------------------+
4022  // | | Vector register save area |
4023  // | +-----------------------------------+
4024  // | | Local variable space |
4025  // | +-----------------------------------+
4026  // | | Parameter list area |
4027  // | +-----------------------------------+
4028  // | | LR save word |
4029  // | +-----------------------------------+
4030  // SP--> +--- | Back chain |
4031  // +-----------------------------------+
4032  //
4033  // Specifications:
4034  // System V Application Binary Interface PowerPC Processor Supplement
4035  // AltiVec Technology Programming Interface Manual
4036 
4037  MachineFunction &MF = DAG.getMachineFunction();
4038  MachineFrameInfo &MFI = MF.getFrameInfo();
4039  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4040 
4041  EVT PtrVT = getPointerTy(MF.getDataLayout());
4042  // Potential tail calls could cause overwriting of argument stack slots.
4043  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4044  (CallConv == CallingConv::Fast));
4045  const Align PtrAlign(4);
4046 
4047  // Assign locations to all of the incoming arguments.
4049  PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4050  *DAG.getContext());
4051 
4052  // Reserve space for the linkage area on the stack.
4053  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4054  CCInfo.AllocateStack(LinkageSize, PtrAlign);
4055  if (useSoftFloat())
4056  CCInfo.PreAnalyzeFormalArguments(Ins);
4057 
4058  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4059  CCInfo.clearWasPPCF128();
4060 
4061  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4062  CCValAssign &VA = ArgLocs[i];
4063 
4064  // Arguments stored in registers.
4065  if (VA.isRegLoc()) {
4066  const TargetRegisterClass *RC;
4067  EVT ValVT = VA.getValVT();
4068 
4069  switch (ValVT.getSimpleVT().SimpleTy) {
4070  default:
4071  llvm_unreachable("ValVT not supported by formal arguments Lowering");
4072  case MVT::i1:
4073  case MVT::i32:
4074  RC = &PPC::GPRCRegClass;
4075  break;
4076  case MVT::f32:
4077  if (Subtarget.hasP8Vector())
4078  RC = &PPC::VSSRCRegClass;
4079  else if (Subtarget.hasSPE())
4080  RC = &PPC::GPRCRegClass;
4081  else
4082  RC = &PPC::F4RCRegClass;
4083  break;
4084  case MVT::f64:
4085  if (Subtarget.hasVSX())
4086  RC = &PPC::VSFRCRegClass;
4087  else if (Subtarget.hasSPE())
4088  // SPE passes doubles in GPR pairs.
4089  RC = &PPC::GPRCRegClass;
4090  else
4091  RC = &PPC::F8RCRegClass;
4092  break;
4093  case MVT::v16i8:
4094  case MVT::v8i16:
4095  case MVT::v4i32:
4096  RC = &PPC::VRRCRegClass;
4097  break;
4098  case MVT::v4f32:
4099  RC = &PPC::VRRCRegClass;
4100  break;
4101  case MVT::v2f64:
4102  case MVT::v2i64:
4103  RC = &PPC::VRRCRegClass;
4104  break;
4105  }
4106 
4107  SDValue ArgValue;
4108  // Transform the arguments stored in physical registers into
4109  // virtual ones.
4110  if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4111  assert(i + 1 < e && "No second half of double precision argument");
4112  Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4113  Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4114  SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4115  SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4116  if (!Subtarget.isLittleEndian())
4117  std::swap (ArgValueLo, ArgValueHi);
4118  ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4119  ArgValueHi);
4120  } else {
4121  Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4122  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4123  ValVT == MVT::i1 ? MVT::i32 : ValVT);
4124  if (ValVT == MVT::i1)
4125  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4126  }
4127 
4128  InVals.push_back(ArgValue);
4129  } else {
4130  // Argument stored in memory.
4131  assert(VA.isMemLoc());
4132 
4133  // Get the extended size of the argument type in stack
4134  unsigned ArgSize = VA.getLocVT().getStoreSize();
4135  // Get the actual size of the argument type
4136  unsigned ObjSize = VA.getValVT().getStoreSize();
4137  unsigned ArgOffset = VA.getLocMemOffset();
4138  // Stack objects in PPC32 are right justified.
4139  ArgOffset += ArgSize - ObjSize;
4140  int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4141 
4142  // Create load nodes to retrieve arguments from the stack.
4143  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4144  InVals.push_back(
4145  DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4146  }
4147  }
4148 
4149  // Assign locations to all of the incoming aggregate by value arguments.
4150  // Aggregates passed by value are stored in the local variable space of the
4151  // caller's stack frame, right above the parameter list area.
4152  SmallVector<CCValAssign, 16> ByValArgLocs;
4153  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4154  ByValArgLocs, *DAG.getContext());
4155 
4156  // Reserve stack space for the allocations in CCInfo.
4157  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
4158 
4159  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4160 
4161  // Area that is at least reserved in the caller of this function.
4162  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
4163  MinReservedArea = std::max(MinReservedArea, LinkageSize);
4164 
4165  // Set the size that is at least reserved in caller of this function. Tail
4166  // call optimized function's reserved stack space needs to be aligned so that
4167  // taking the difference between two stack areas will result in an aligned
4168  // stack.
4169  MinReservedArea =
4170  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4171  FuncInfo->setMinReservedArea(MinReservedArea);
4172 
4173  SmallVector<SDValue, 8> MemOps;
4174 
4175  // If the function takes variable number of arguments, make a frame index for
4176  // the start of the first vararg value... for expansion of llvm.va_start.
4177  if (isVarArg) {
4178  static const MCPhysReg GPArgRegs[] = {
4179  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4180  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4181  };
4182  const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
4183 
4184  static const MCPhysReg FPArgRegs[] = {
4185  PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4186  PPC::F8
4187  };
4188  unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
4189 
4190  if (useSoftFloat() || hasSPE())
4191  NumFPArgRegs = 0;
4192 
4193  FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4194  FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4195 
4196  // Make room for NumGPArgRegs and NumFPArgRegs.
4197  int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4198  NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4199 
4200  FuncInfo->setVarArgsStackOffset(
4201  MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4202  CCInfo.getNextStackOffset(), true));
4203 
4204  FuncInfo->setVarArgsFrameIndex(
4205  MFI.CreateStackObject(Depth, Align(8), false));
4206  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4207 
4208  // The fixed integer arguments of a variadic function are stored to the
4209  // VarArgsFrameIndex on the stack so that they may be loaded by
4210  // dereferencing the result of va_next.
4211  for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4212  // Get an existing live-in vreg, or add a new one.
4213  Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4214  if (!VReg)
4215  VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4216 
4217  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4218  SDValue Store =
4219  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4220  MemOps.push_back(Store);
4221  // Increment the address by four for the next argument to store
4222  SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4223  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4224  }
4225 
4226  // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4227  // is set.
4228  // The double arguments are stored to the VarArgsFrameIndex
4229  // on the stack.
4230  for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4231  // Get an existing live-in vreg, or add a new one.
4232  Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4233  if (!VReg)
4234  VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4235 
4236  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4237  SDValue Store =
4238  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4239  MemOps.push_back(Store);
4240  // Increment the address by eight for the next argument to store
4241  SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4242  PtrVT);
4243  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4244  }
4245  }
4246 
4247  if (!MemOps.empty())
4248  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4249 
4250  return Chain;
4251 }
4252 
4253 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4254 // value to MVT::i64 and then truncate to the correct register size.
4255 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4256  EVT ObjectVT, SelectionDAG &DAG,
4257  SDValue ArgVal,
4258  const SDLoc &dl) const {
4259  if (Flags.isSExt())
4260  ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4261  DAG.getValueType(ObjectVT));
4262  else if (Flags.isZExt())
4263  ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4264  DAG.getValueType(ObjectVT));
4265 
4266  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4267 }
4268 
4269 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4270  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4271  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4272  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4273  // TODO: add description of PPC stack frame format, or at least some docs.
4274  //
4275  bool isELFv2ABI = Subtarget.isELFv2ABI();
4276  bool isLittleEndian = Subtarget.isLittleEndian();
4277  MachineFunction &MF = DAG.getMachineFunction();
4278  MachineFrameInfo &MFI = MF.getFrameInfo();
4279  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4280 
4281  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4282  "fastcc not supported on varargs functions");
4283 
4284  EVT PtrVT = getPointerTy(MF.getDataLayout());
4285  // Potential tail calls could cause overwriting of argument stack slots.
4286  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4287  (CallConv == CallingConv::Fast));
4288  unsigned PtrByteSize = 8;
4289  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4290 
4291  static const MCPhysReg GPR[] = {
4292  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4293  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4294  };
4295  static const MCPhysReg VR[] = {
4296  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4297  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4298  };
4299 
4300  const unsigned Num_GPR_Regs = array_lengthof(GPR);
4301  const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4302  const unsigned Num_VR_Regs = array_lengthof(VR);
4303 
4304  // Do a first pass over the arguments to determine whether the ABI
4305  // guarantees that our caller has allocated the parameter save area
4306  // on its stack frame. In the ELFv1 ABI, this is always the case;
4307  // in the ELFv2 ABI, it is true if this is a vararg function or if
4308  // any parameter is located in a stack slot.
4309 
4310  bool HasParameterArea = !isELFv2ABI || isVarArg;
4311  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4312  unsigned NumBytes = LinkageSize;
4313  unsigned AvailableFPRs = Num_FPR_Regs;
4314  unsigned AvailableVRs = Num_VR_Regs;
4315  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4316  if (Ins[i].Flags.isNest())
4317  continue;
4318 
4319  if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4320  PtrByteSize, LinkageSize, ParamAreaSize,
4321  NumBytes, AvailableFPRs, AvailableVRs))
4322  HasParameterArea = true;
4323  }
4324 
4325  // Add DAG nodes to load the arguments or copy them out of registers. On
4326  // entry to a function on PPC, the arguments start after the linkage area,
4327  // although the first ones are often in registers.
4328 
4329  unsigned ArgOffset = LinkageSize;
4330  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4331  SmallVector<SDValue, 8> MemOps;
4333  unsigned CurArgIdx = 0;
4334  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4335  SDValue ArgVal;
4336  bool needsLoad = false;
4337  EVT ObjectVT = Ins[ArgNo].VT;
4338  EVT OrigVT = Ins[ArgNo].ArgVT;
4339  unsigned ObjSize = ObjectVT.getStoreSize();
4340  unsigned ArgSize = ObjSize;
4341  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4342  if (Ins[ArgNo].isOrigArg()) {
4343  std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4344  CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4345  }
4346  // We re-align the argument offset for each argument, except when using the
4347  // fast calling convention, when we need to make sure we do that only when
4348  // we'll actually use a stack slot.
4349  unsigned CurArgOffset;
4350  Align Alignment;
4351  auto ComputeArgOffset = [&]() {
4352  /* Respect alignment of argument on the stack. */
4353  Alignment =
4354  CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4355  ArgOffset = alignTo(ArgOffset, Alignment);
4356  CurArgOffset = ArgOffset;
4357  };
4358 
4359  if (CallConv != CallingConv::Fast) {
4360  ComputeArgOffset();
4361 
4362  /* Compute GPR index associated with argument offset. */
4363  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4364  GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4365  }
4366 
4367  // FIXME the codegen can be much improved in some cases.
4368  // We do not have to keep everything in memory.
4369  if (Flags.isByVal()) {
4370  assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4371 
4372  if (CallConv == CallingConv::Fast)
4373  ComputeArgOffset();
4374 
4375  // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4376  ObjSize = Flags.getByValSize();
4377  ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4378  // Empty aggregate parameters do not take up registers. Examples:
4379  // struct { } a;
4380  // union { } b;
4381  // int c[0];
4382  // etc. However, we have to provide a place-holder in InVals, so
4383  // pretend we have an 8-byte item at the current address for that
4384  // purpose.
4385  if (!ObjSize) {
4386  int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4387  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4388  InVals.push_back(FIN);
4389  continue;
4390  }
4391 
4392  // Create a stack object covering all stack doublewords occupied
4393  // by the argument. If the argument is (fully or partially) on
4394  // the stack, or if the argument is fully in registers but the
4395  // caller has allocated the parameter save anyway, we can refer
4396  // directly to the caller's stack frame. Otherwise, create a
4397  // local copy in our own frame.
4398  int FI;
4399  if (HasParameterArea ||
4400  ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4401  FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4402  else
4403  FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4404  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4405 
4406  // Handle aggregates smaller than 8 bytes.
4407  if (ObjSize < PtrByteSize) {
4408  // The value of the object is its address, which differs from the
4409  // address of the enclosing doubleword on big-endian systems.
4410  SDValue Arg = FIN;
4411  if (!isLittleEndian) {
4412  SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4413  Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4414  }
4415  InVals.push_back(Arg);
4416 
4417  if (GPR_idx != Num_GPR_Regs) {
4418  Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4419  FuncInfo->addLiveInAttr(VReg, Flags);
4420  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4421  EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4422  SDValue Store =
4423  DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4424  MachinePointerInfo(&*FuncArg), ObjType);
4425  MemOps.push_back(Store);
4426  }
4427  // Whether we copied from a register or not, advance the offset
4428  // into the parameter save area by a full doubleword.
4429  ArgOffset += PtrByteSize;
4430  continue;
4431  }
4432 
4433  // The value of the object is its address, which is the address of
4434  // its first stack doubleword.
4435  InVals.push_back(FIN);
4436 
4437  // Store whatever pieces of the object are in registers to memory.
4438  for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4439  if (GPR_idx == Num_GPR_Regs)
4440  break;
4441 
4442  Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4443  FuncInfo->addLiveInAttr(VReg, Flags);
4444  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4445  SDValue Addr = FIN;
4446  if (j) {
4447  SDValue Off = DAG.getConstant(j, dl, PtrVT);
4448  Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4449  }
4450  unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4451  EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4452  SDValue Store =
4453  DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4454  MachinePointerInfo(&*FuncArg, j), ObjType);
4455  MemOps.push_back(Store);
4456  ++GPR_idx;
4457  }
4458  ArgOffset += ArgSize;
4459  continue;
4460  }
4461 
4462  switch (ObjectVT.getSimpleVT().SimpleTy) {
4463  default: llvm_unreachable("Unhandled argument type!");
4464  case MVT::i1:
4465  case MVT::i32:
4466  case MVT::i64:
4467  if (Flags.isNest()) {
4468  // The 'nest' parameter, if any, is passed in R11.
4469  Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4470  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4471 
4472  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4473  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4474 
4475  break;
4476  }
4477 
4478  // These can be scalar arguments or elements of an integer array type
4479  // passed directly. Clang may use those instead of "byval" aggregate
4480  // types to avoid forcing arguments to memory unnecessarily.
4481  if (GPR_idx != Num_GPR_Regs) {
4482  Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4483  FuncInfo->addLiveInAttr(VReg, Flags);
4484  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4485 
4486  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4487  // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4488  // value to MVT::i64 and then truncate to the correct register size.
4489  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4490  } else {
4491  if (CallConv == CallingConv::Fast)
4492  ComputeArgOffset();
4493 
4494  needsLoad = true;
4495  ArgSize = PtrByteSize;
4496  }
4497  if (CallConv != CallingConv::Fast || needsLoad)
4498  ArgOffset += 8;
4499  break;
4500 
4501  case MVT::f32:
4502  case MVT::f64:
4503  // These can be scalar arguments or elements of a float array type
4504  // passed directly. The latter are used to implement ELFv2 homogenous
4505  // float aggregates.
4506  if (FPR_idx != Num_FPR_Regs) {
4507  unsigned VReg;
4508 
4509  if (ObjectVT == MVT::f32)
4510  VReg = MF.addLiveIn(FPR[FPR_idx],
4511  Subtarget.hasP8Vector()
4512  ? &PPC::VSSRCRegClass
4513  : &PPC::F4RCRegClass);
4514  else
4515  VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4516  ? &PPC::VSFRCRegClass
4517  : &PPC::F8RCRegClass);
4518 
4519  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4520  ++FPR_idx;
4521  } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4522  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4523  // once we support fp <-> gpr moves.
4524 
4525  // This can only ever happen in the presence of f32 array types,
4526  // since otherwise we never run out of FPRs before running out
4527  // of GPRs.
4528  Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4529  FuncInfo->addLiveInAttr(VReg, Flags);
4530  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4531 
4532  if (ObjectVT == MVT::f32) {
4533  if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4534  ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4535  DAG.getConstant(32, dl, MVT::i32));
4536  ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4537  }
4538 
4539  ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4540  } else {
4541  if (CallConv == CallingConv::Fast)
4542  ComputeArgOffset();
4543 
4544  needsLoad = true;
4545  }
4546 
4547  // When passing an array of floats, the array occupies consecutive
4548  // space in the argument area; only round up to the next doubleword
4549  // at the end of the array. Otherwise, each float takes 8 bytes.
4550  if (CallConv != CallingConv::Fast || needsLoad) {
4551  ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4552  ArgOffset += ArgSize;
4553  if (Flags.isInConsecutiveRegsLast())
4554  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4555  }
4556  break;
4557  case MVT::v4f32:
4558  case MVT::v4i32:
4559  case MVT::v8i16:
4560  case MVT::v16i8:
4561  case MVT::v2f64:
4562  case MVT::v2i64:
4563  case MVT::v1i128:
4564  case MVT::f128:
4565  // These can be scalar arguments or elements of a vector array type
4566  // passed directly. The latter are used to implement ELFv2 homogenous
4567  // vector aggregates.
4568  if (VR_idx != Num_VR_Regs) {
4569  Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4570  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4571  ++VR_idx;
4572  } else {
4573  if (CallConv == CallingConv::Fast)
4574  ComputeArgOffset();
4575  needsLoad = true;
4576  }
4577  if (CallConv != CallingConv::Fast || needsLoad)
4578  ArgOffset += 16;
4579  break;
4580  }
4581 
4582  // We need to load the argument to a virtual register if we determined
4583  // above that we ran out of physical registers of the appropriate type.
4584  if (needsLoad) {
4585  if (ObjSize < ArgSize && !isLittleEndian)
4586  CurArgOffset += ArgSize - ObjSize;
4587  int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4588  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4589  ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4590  }
4591 
4592  InVals.push_back(ArgVal);
4593  }
4594 
4595  // Area that is at least reserved in the caller of this function.
4596  unsigned MinReservedArea;
4597  if (HasParameterArea)
4598  MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4599  else
4600  MinReservedArea = LinkageSize;
4601 
4602  // Set the size that is at least reserved in caller of this function. Tail
4603  // call optimized functions' reserved stack space needs to be aligned so that
4604  // taking the difference between two stack areas will result in an aligned
4605  // stack.
4606  MinReservedArea =
4607  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4608  FuncInfo->setMinReservedArea(MinReservedArea);
4609 
4610  // If the function takes variable number of arguments, make a frame index for
4611  // the start of the first vararg value... for expansion of llvm.va_start.
4612  // On ELFv2ABI spec, it writes:
4613  // C programs that are intended to be *portable* across different compilers
4614  // and architectures must use the header file <stdarg.h> to deal with variable
4615  // argument lists.
4616  if (isVarArg && MFI.hasVAStart()) {
4617  int Depth = ArgOffset;
4618 
4619  FuncInfo->setVarArgsFrameIndex(
4620  MFI.CreateFixedObject(PtrByteSize, Depth, true));
4621  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4622 
4623  // If this function is vararg, store any remaining integer argument regs
4624  // to their spots on the stack so that they may be loaded by dereferencing
4625  // the result of va_next.
4626  for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4627  GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4628  Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4629  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4630  SDValue Store =
4631  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4632  MemOps.push_back(Store);
4633  // Increment the address by four for the next argument to store
4634  SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4635  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4636  }
4637  }
4638 
4639  if (!MemOps.empty())
4640  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4641 
4642  return Chain;
4643 }
4644 
4645 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4646 /// adjusted to accommodate the arguments for the tailcall.
4647 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4648  unsigned ParamSize) {
4649 
4650  if (!isTailCall) return 0;
4651 
4653  unsigned CallerMinReservedArea = FI->getMinReservedArea();
4654  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4655  // Remember only if the new adjustment is bigger.
4656  if (SPDiff < FI->getTailCallSPDelta())
4657  FI->setTailCallSPDelta(SPDiff);
4658 
4659  return SPDiff;
4660 }
4661 
4662 static bool isFunctionGlobalAddress(SDValue Callee);
4663 
4664 static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4665  const TargetMachine &TM) {
4666  // It does not make sense to call callsShareTOCBase() with a caller that
4667  // is PC Relative since PC Relative callers do not have a TOC.
4668 #ifndef NDEBUG
4669  const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4670  assert(!STICaller->isUsingPCRelativeCalls() &&
4671  "PC Relative callers do not have a TOC and cannot share a TOC Base");
4672 #endif
4673 
4674  // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4675  // don't have enough information to determine if the caller and callee share
4676  // the same TOC base, so we have to pessimistically assume they don't for
4677  // correctness.
4678  GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4679  if (!G)
4680  return false;
4681 
4682  const GlobalValue *GV = G->getGlobal();
4683 
4684  // If the callee is preemptable, then the static linker will use a plt-stub
4685  // which saves the toc to the stack, and needs a nop after the call
4686  // instruction to convert to a toc-restore.
4687  if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4688  return false;
4689 
4690  // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4691  // We may need a TOC restore in the situation where the caller requires a
4692  // valid TOC but the callee is PC Relative and does not.
4693  const Function *F = dyn_cast<Function>(GV);
4694  const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4695 
4696  // If we have an Alias we can try to get the function from there.
4697  if (Alias) {
4698  const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4699  F = dyn_cast<Function>(GlobalObj);
4700  }
4701 
4702  // If we still have no valid function pointer we do not have enough
4703  // information to determine if the callee uses PC Relative calls so we must
4704  // assume that it does.
4705  if (!F)
4706  return false;
4707 
4708  // If the callee uses PC Relative we cannot guarantee that the callee won't
4709  // clobber the TOC of the caller and so we must assume that the two
4710  // functions do not share a TOC base.
4711  const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4712  if (STICallee->isUsingPCRelativeCalls())
4713  return false;
4714 
4715  // If the GV is not a strong definition then we need to assume it can be
4716  // replaced by another function at link time. The function that replaces
4717  // it may not share the same TOC as the caller since the callee may be
4718  // replaced by a PC Relative version of the same function.
4719  if (!GV->isStrongDefinitionForLinker())
4720  return false;
4721 
4722  // The medium and large code models are expected to provide a sufficiently
4723  // large TOC to provide all data addressing needs of a module with a
4724  // single TOC.
4725  if (CodeModel::Medium == TM.getCodeModel() ||
4726  CodeModel::Large == TM.getCodeModel())
4727  return true;
4728 
4729  // Any explicitly-specified sections and section prefixes must also match.
4730  // Also, if we're using -ffunction-sections, then each function is always in
4731  // a different section (the same is true for COMDAT functions).
4732  if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4733  GV->getSection() != Caller->getSection())
4734  return false;
4735  if (const auto *F = dyn_cast<Function>(GV)) {
4736  if (F->getSectionPrefix() != Caller->getSectionPrefix())
4737  return false;
4738  }
4739 
4740  return true;
4741 }
4742 
4743 static bool
4745  const SmallVectorImpl<ISD::OutputArg> &Outs) {
4746  assert(Subtarget.is64BitELFABI());
4747 
4748  const unsigned PtrByteSize = 8;
4749  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4750 
4751  static const MCPhysReg GPR[] = {
4752  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4753  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4754  };
4755  static const MCPhysReg VR[] = {
4756  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4757  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4758  };
4759 
4760  const unsigned NumGPRs = array_lengthof(GPR);
4761  const unsigned NumFPRs = 13;
4762  const unsigned NumVRs = array_lengthof(VR);
4763  const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4764 
4765  unsigned NumBytes = LinkageSize;
4766  unsigned AvailableFPRs = NumFPRs;
4767  unsigned AvailableVRs = NumVRs;
4768 
4769  for (const ISD::OutputArg& Param : Outs) {
4770  if (Param.Flags.isNest()) continue;
4771 
4772  if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4773  LinkageSize, ParamAreaSize, NumBytes,
4774  AvailableFPRs, AvailableVRs))
4775  return true;
4776  }
4777  return false;
4778 }
4779 
4780 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4781  if (CB.arg_size() != CallerFn->arg_size())
4782  return false;
4783 
4784  auto CalleeArgIter = CB.arg_begin();
4785  auto CalleeArgEnd = CB.arg_end();
4786  Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4787 
4788  for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4789  const Value* CalleeArg = *CalleeArgIter;
4790  const Value* CallerArg = &(*CallerArgIter);
4791  if (CalleeArg == CallerArg)
4792  continue;
4793 
4794  // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4795  // tail call @callee([4 x i64] undef, [4 x i64] %b)
4796  // }
4797  // 1st argument of callee is undef and has the same type as caller.
4798  if (CalleeArg->getType() == CallerArg->getType() &&
4799  isa<UndefValue>(CalleeArg))
4800  continue;
4801 
4802  return false;
4803  }
4804 
4805  return true;
4806 }
4807 
4808 // Returns true if TCO is possible between the callers and callees
4809 // calling conventions.
4810 static bool
4812  CallingConv::ID CalleeCC) {
4813  // Tail calls are possible with fastcc and ccc.
4814  auto isTailCallableCC = [] (CallingConv::ID CC){
4815  return CC == CallingConv::C || CC == CallingConv::Fast;
4816  };
4817  if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4818  return false;
4819 
4820  // We can safely tail call both fastcc and ccc callees from a c calling
4821  // convention caller. If the caller is fastcc, we may have less stack space
4822  // than a non-fastcc caller with the same signature so disable tail-calls in
4823  // that case.
4824  return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4825 }
4826 
4827 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4828  SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4829  const SmallVectorImpl<ISD::OutputArg> &Outs,
4830  const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4831  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4832 
4833  if (DisableSCO && !TailCallOpt) return false;
4834 
4835  // Variadic argument functions are not supported.
4836  if (isVarArg) return false;
4837 
4838  auto &Caller = DAG.getMachineFunction().getFunction();
4839  // Check that the calling conventions are compatible for tco.
4840  if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4841  return false;
4842 
4843  // Caller contains any byval parameter is not supported.
4844  if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4845  return false;
4846 
4847  // Callee contains any byval parameter is not supported, too.
4848  // Note: This is a quick work around, because in some cases, e.g.
4849  // caller's stack size > callee's stack size, we are still able to apply
4850  // sibling call optimization. For example, gcc is able to do SCO for caller1
4851  // in the following example, but not for caller2.
4852  // struct test {
4853  // long int a;
4854  // char ary[56];
4855  // } gTest;
4856  // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4857  // b->a = v.a;
4858  // return 0;
4859  // }
4860  // void caller1(struct test a, struct test c, struct test *b) {
4861  // callee(gTest, b); }
4862  // void caller2(struct test *b) { callee(gTest, b); }
4863  if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4864  return false;
4865 
4866  // If callee and caller use different calling conventions, we cannot pass
4867  // parameters on stack since offsets for the parameter area may be different.
4868  if (Caller.getCallingConv() != CalleeCC &&
4869  needStackSlotPassParameters(Subtarget, Outs))
4870  return false;
4871 
4872  // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4873  // the caller and callee share the same TOC for TCO/SCO. If the caller and
4874  // callee potentially have different TOC bases then we cannot tail call since
4875  // we need to restore the TOC pointer after the call.
4876  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4877  // We cannot guarantee this for indirect calls or calls to external functions.
4878  // When PC-Relative addressing is used, the concept of the TOC is no longer
4879  // applicable so this check is not required.
4880  // Check first for indirect calls.
4881  if (!Subtarget.isUsingPCRelativeCalls() &&
4882  !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4883  return false;
4884 
4885  // Check if we share the TOC base.
4886  if (!Subtarget.isUsingPCRelativeCalls() &&
4887  !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4888  return false;
4889 
4890  // TCO allows altering callee ABI, so we don't have to check further.
4891  if (CalleeCC == CallingConv::Fast && TailCallOpt)
4892  return true;
4893 
4894  if (DisableSCO) return false;
4895 
4896  // If callee use the same argument list that caller is using, then we can
4897  // apply SCO on this case. If it is not, then we need to check if callee needs
4898  // stack for passing arguments.
4899  // PC Relative tail calls may not have a CallBase.
4900  // If there is no CallBase we cannot verify if we have the same argument
4901  // list so assume that we don't have the same argument list.
4902  if (CB && !hasSameArgumentList(&Caller, *CB) &&
4903  needStackSlotPassParameters(Subtarget, Outs))
4904  return false;
4905  else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4906  return false;
4907 
4908  return true;
4909 }
4910 
4911 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4912 /// for tail call optimization. Targets which want to do tail call
4913 /// optimization should implement this function.
4914 bool
4915 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4916  CallingConv::ID CalleeCC,
4917  bool isVarArg,
4919  SelectionDAG& DAG) const {
4920  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4921  return false;
4922 
4923  // Variable argument functions are not supported.
4924  if (isVarArg)
4925  return false;
4926 
4927  MachineFunction &MF = DAG.getMachineFunction();
4928  CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4929  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4930  // Functions containing by val parameters are not supported.
4931  for (unsigned i = 0; i != Ins.size(); i++) {
4932  ISD::ArgFlagsTy Flags = Ins[i].Flags;
4933  if (Flags.isByVal()) return false;
4934  }
4935 
4936  // Non-PIC/GOT tail calls are supported.
4937  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4938  return true;
4939 
4940  // At the moment we can only do local tail calls (in same module, hidden
4941  // or protected) if we are generating PIC.
4942  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4943  return G->getGlobal()->hasHiddenVisibility()
4944  || G->getGlobal()->hasProtectedVisibility();
4945  }
4946 
4947  return false;
4948 }
4949 
4950 /// isCallCompatibleAddress - Return the immediate to use if the specified
4951 /// 32-bit value is representable in the immediate field of a BxA instruction.
4953  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4954  if (!C) return nullptr;
4955 
4956  int Addr = C->getZExtValue();
4957  if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4958  SignExtend32<26>(Addr) != Addr)
4959  return nullptr; // Top 6 bits have to be sext of immediate.
4960 
4961  return DAG
4962  .getConstant(
4963  (int)C->getZExtValue() >> 2, SDLoc(Op),
4965  .getNode();
4966 }
4967 
4968 namespace {
4969 
4970 struct TailCallArgumentInfo {
4971  SDValue Arg;
4972  SDValue FrameIdxOp;
4973  int FrameIdx = 0;
4974 
4975  TailCallArgumentInfo() = default;
4976 };
4977 
4978 } // end anonymous namespace
4979 
4980 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4982  SelectionDAG &DAG, SDValue Chain,
4983  const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4984  SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4985  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4986  SDValue Arg = TailCallArgs[i].Arg;
4987  SDValue FIN = TailCallArgs[i].FrameIdxOp;
4988  int FI = TailCallArgs[i].FrameIdx;
4989  // Store relative to framepointer.
4990  MemOpChains.push_back(DAG.getStore(
4991  Chain, dl, Arg, FIN,
4993  }
4994 }
4995 
4996 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4997 /// the appropriate stack slot for the tail call optimized function call.
4999  SDValue OldRetAddr, SDValue OldFP,
5000  int SPDiff, const SDLoc &dl) {
5001  if (SPDiff) {
5002  // Calculate the new stack slot for the return address.
5003  MachineFunction &MF = DAG.getMachineFunction();
5004  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5005  const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5006  bool isPPC64 = Subtarget.isPPC64();
5007  int SlotSize = isPPC64 ? 8 : 4;
5008  int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5009  int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5010  NewRetAddrLoc, true);
5011  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5012  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5013  Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5014  MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5015  }
5016  return Chain;
5017 }
5018 
5019 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5020 /// the position of the argument.
5021 static void
5023  SDValue Arg, int SPDiff, unsigned ArgOffset,
5024  SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5025  int Offset = ArgOffset + SPDiff;
5026  uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5027  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5028  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5029  SDValue FIN = DAG.getFrameIndex(FI, VT);
5030  TailCallArgumentInfo Info;
5031  Info.Arg = Arg;
5032  Info.FrameIdxOp = FIN;
5033  Info.FrameIdx = FI;
5034  TailCallArguments.push_back(Info);
5035 }
5036 
5037 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5038 /// stack slot. Returns the chain as result and the loaded frame pointers in
5039 /// LROpOut/FPOpout. Used when tail calling.
5040 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5041  SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5042  SDValue &FPOpOut, const SDLoc &dl) const {
5043  if (SPDiff) {
5044  // Load the LR and FP stack slot for later adjusting.
5045  EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5046  LROpOut = getReturnAddrFrameIndex(DAG);
5047  LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5048  Chain = SDValue(LROpOut.getNode(), 1);
5049  }
5050  return Chain;
5051 }
5052 
5053 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5054 /// by "Src" to address "Dst" of size "Size". Alignment information is
5055 /// specified by the specific parameter attribute. The copy will be passed as
5056 /// a byval function parameter.
5057 /// Sometimes what we are copying is the end of a larger object, the part that
5058 /// does not fit in registers.
5060  SDValue Chain, ISD::ArgFlagsTy Flags,
5061  SelectionDAG &DAG, const SDLoc &dl) {
5062  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5063  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5064  Flags.getNonZeroByValAlign(), false, false, false,
5066 }
5067 
5068 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5069 /// tail calls.
5070 static void LowerMemOpCallTo(
5071  SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5072  SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5073  bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5074  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5075  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5076  if (!isTailCall) {
5077  if (isVector) {
5078  SDValue StackPtr;
5079  if (isPPC64)
5080  StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5081  else
5082  StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5083  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5084  DAG.getConstant(ArgOffset, dl, PtrVT));
5085  }
5086  MemOpChains.push_back(
5087  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5088  // Calculate and remember argument location.
5089  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5090  TailCallArguments);
5091 }
5092 
5093 static void
5095  const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5096  SDValue FPOp,
5097  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5098  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5099  // might overwrite each other in case of tail call optimization.
5100  SmallVector<SDValue, 8> MemOpChains2;
5101  // Do not flag preceding copytoreg stuff together with the following stuff.
5102  InFlag = SDValue();
5103  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5104  MemOpChains2, dl);
5105  if (!MemOpChains2.empty())
5106  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5107 
5108  // Store the return address to the appropriate stack slot.
5109  Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5110 
5111  // Emit callseq_end just before tailcall node.
5112  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5113  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5114  InFlag = Chain.getValue(1);
5115 }
5116 
5117 // Is this global address that of a function that can be called by name? (as
5118 // opposed to something that must hold a descriptor for an indirect call).
5119 static bool isFunctionGlobalAddress(SDValue Callee) {
5120  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5121  if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5122  Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5123  return false;
5124 
5125  return G->getGlobal()->getValueType()->isFunctionTy();
5126  }
5127 
5128  return false;
5129 }
5130 
5131 SDValue PPCTargetLowering::LowerCallResult(
5132  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5133  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5134  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5136  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5137  *DAG.getContext());
5138 
5139  CCRetInfo.AnalyzeCallResult(
5140  Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5141  ? RetCC_PPC_Cold
5142  : RetCC_PPC);
5143 
5144  // Copy all of the result registers out of their specified physreg.
5145  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5146  CCValAssign &VA = RVLocs[i];
5147  assert(VA.isRegLoc() && "Can only return in registers!");
5148 
5149  SDValue Val;
5150 
5151  if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5152  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5153  InFlag);
5154  Chain = Lo.getValue(1);
5155  InFlag = Lo.getValue(2);
5156  VA = RVLocs[++i]; // skip ahead to next loc
5157  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5158  InFlag);
5159  Chain = Hi.getValue(1);
5160  InFlag = Hi.getValue(2);
5161  if (!Subtarget.isLittleEndian())
5162  std::swap (Lo, Hi);
5163  Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5164  } else {
5165  Val = DAG.getCopyFromReg(Chain, dl,
5166  VA.getLocReg(), VA.getLocVT(), InFlag);
5167  Chain = Val.getValue(1);
5168  InFlag = Val.getValue(2);
5169  }
5170 
5171  switch (VA.getLocInfo()) {
5172  default: llvm_unreachable("Unknown loc info!");
5173  case CCValAssign::Full: break;
5174  case CCValAssign::AExt:
5175  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5176  break;
5177  case CCValAssign::ZExt:
5178  Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5179  DAG.getValueType(VA.getValVT()));
5180  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5181  break;
5182  case CCValAssign::SExt:
5183  Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5184  DAG.getValueType(VA.getValVT()));
5185  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5186  break;
5187  }
5188 
5189  InVals.push_back(Val);
5190  }
5191 
5192  return Chain;
5193 }
5194 
5195 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5196  const PPCSubtarget &Subtarget, bool isPatchPoint) {
5197  // PatchPoint calls are not indirect.
5198  if (isPatchPoint)
5199  return false;
5200 
5201  if (isFunctionGlobalAddress(Callee) || isa<ExternalSymbolSDNode>(Callee))
5202  return false;
5203 
5204  // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5205  // becuase the immediate function pointer points to a descriptor instead of
5206  // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5207  // pointer immediate points to the global entry point, while the BLA would
5208  // need to jump to the local entry point (see rL211174).
5209  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5211  return false;
5212 
5213  return true;
5214 }
5215 
5216 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5217 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5218  return Subtarget.isAIXABI() ||
5219  (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5220 }
5221 
5223  const Function &Caller, const SDValue &Callee,
5224  const PPCSubtarget &Subtarget,
5225  const TargetMachine &TM,
5226  bool IsStrictFPCall = false) {
5227  if (CFlags.IsTailCall)
5228  return PPCISD::TC_RETURN;
5229 
5230  unsigned RetOpc = 0;
5231  // This is a call through a function pointer.
5232  if (CFlags.IsIndirect) {
5233  // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5234  // indirect calls. The save of the caller's TOC pointer to the stack will be
5235  // inserted into the DAG as part of call lowering. The restore of the TOC
5236  // pointer is modeled by using a pseudo instruction for the call opcode that
5237  // represents the 2 instruction sequence of an indirect branch and link,
5238  // immediately followed by a load of the TOC pointer from the the stack save
5239  // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5240  // as it is not saved or used.
5241  RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5242  : PPCISD::BCTRL;
5243  } else if (Subtarget.isUsingPCRelativeCalls()) {
5244  assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5245  RetOpc = PPCISD::CALL_NOTOC;
5246  } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5247  // The ABIs that maintain a TOC pointer accross calls need to have a nop
5248  // immediately following the call instruction if the caller and callee may
5249  // have different TOC bases. At link time if the linker determines the calls
5250  // may not share a TOC base, the call is redirected to a trampoline inserted
5251  // by the linker. The trampoline will (among other things) save the callers
5252  // TOC pointer at an ABI designated offset in the linkage area and the
5253  // linker will rewrite the nop to be a load of the TOC pointer from the
5254  // linkage area into gpr2.
5255  RetOpc = callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5256  : PPCISD::CALL_NOP;
5257  else
5258  RetOpc = PPCISD::CALL;
5259  if (IsStrictFPCall) {
5260  switch (RetOpc) {
5261  default:
5262  llvm_unreachable("Unknown call opcode");
5264  RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5265  break;
5266  case PPCISD::BCTRL:
5267  RetOpc = PPCISD::BCTRL_RM;
5268  break;
5269  case PPCISD::CALL_NOTOC:
5270  RetOpc = PPCISD::CALL_NOTOC_RM;
5271  break;
5272  case PPCISD::CALL:
5273  RetOpc = PPCISD::CALL_RM;
5274  break;
5275  case PPCISD::CALL_NOP:
5276  RetOpc = PPCISD::CALL_NOP_RM;
5277  break;
5278  }
5279  }
5280  return RetOpc;
5281 }
5282 
5283 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5284  const SDLoc &dl, const PPCSubtarget &Subtarget) {
5285  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5286  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5287  return SDValue(Dest, 0);
5288 
5289  // Returns true if the callee is local, and false otherwise.
5290  auto isLocalCallee = [&]() {
5291  const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5292  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5293  const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5294 
5295  return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5296  !isa_and_nonnull<GlobalIFunc>(GV);
5297  };
5298 
5299  // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5300  // a static relocation model causes some versions of GNU LD (2.17.50, at
5301  // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5302  // built with secure-PLT.
5303  bool UsePlt =
5304  Subtarget.is32BitELFABI() && !isLocalCallee() &&
5306 
5307  const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5308  const TargetMachine &TM = Subtarget.getTargetMachine();
5309  const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5310  MCSymbolXCOFF *S =
5311  cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5312 
5313  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5314  return DAG.getMCSymbol(S, PtrVT);
5315  };
5316 
5318  const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5319 
5320  if (Subtarget.isAIXABI()) {
5321  assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5322  return getAIXFuncEntryPointSymbolSDNode(GV);
5323  }
5324  return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5325  UsePlt ? PPCII::MO_PLT : 0);
5326  }
5327 
5328  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5329  const char *SymName = S->getSymbol();
5330  if (Subtarget.isAIXABI()) {
5331  // If there exists a user-declared function whose name is the same as the
5332  // ExternalSymbol's, then we pick up the user-declared version.
5333  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5334  if (const Function *F =
5335  dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5336  return getAIXFuncEntryPointSymbolSDNode(F);
5337 
5338  // On AIX, direct function calls reference the symbol for the function's
5339  // entry point, which is named by prepending a "." before the function's
5340  // C-linkage name. A Qualname is returned here because an external
5341  // function entry point is a csect with XTY_ER property.
5342  const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5343  auto &Context = DAG.getMachineFunction().getMMI().getContext();
5344  MCSectionXCOFF *Sec = Context.getXCOFFSection(
5345  (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5347  return Sec->getQualNameSymbol();
5348  };
5349 
5350  SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5351  }
5352  return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5353  UsePlt ? PPCII::MO_PLT : 0);
5354  }
5355 
5356  // No transformation needed.
5357  assert(Callee.getNode() && "What no callee?");
5358  return Callee;
5359 }
5360 
5362  assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5363  "Expected a CALLSEQ_STARTSDNode.");
5364 
5365  // The last operand is the chain, except when the node has glue. If the node
5366  // has glue, then the last operand is the glue, and the chain is the second
5367  // last operand.
5368  SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5369  if (LastValue.getValueType() != MVT::Glue)
5370  return LastValue;
5371 
5372  return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5373 }
5374 
5375 // Creates the node that moves a functions address into the count register
5376 // to prepare for an indirect call instruction.
5377 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5378  SDValue &Glue, SDValue &Chain,
5379  const SDLoc &dl) {
5380  SDValue MTCTROps[] = {Chain, Callee, Glue};
5381  EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5382  Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5383  makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5384  // The glue is the second value produced.
5385  Glue = Chain.getValue(1);
5386 }
5387 
5389  SDValue &Glue, SDValue &Chain,
5390  SDValue CallSeqStart,
5391  const CallBase *CB, const SDLoc &dl,
5392  bool hasNest,
5393  const PPCSubtarget &Subtarget) {
5394  // Function pointers in the 64-bit SVR4 ABI do not point to the function
5395  // entry point, but to the function descriptor (the function entry point
5396  // address is part of the function descriptor though).
5397  // The function descriptor is a three doubleword structure with the
5398  // following fields: function entry point, TOC base address and
5399  // environment pointer.
5400  // Thus for a call through a function pointer, the following actions need
5401  // to be performed:
5402  // 1. Save the TOC of the caller in the TOC save area of its stack
5403  // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5404  // 2. Load the address of the function entry point from the function
5405  // descriptor.
5406  // 3. Load the TOC of the callee from the function descriptor into r2.
5407  // 4. Load the environment pointer from the function descriptor into
5408  // r11.
5409  // 5. Branch to the function entry point address.
5410  // 6. On return of the callee, the TOC of the caller needs to be
5411  // restored (this is done in FinishCall()).
5412  //
5413  // The loads are scheduled at the beginning of the call sequence, and the
5414  // register copies are flagged together to ensure that no other
5415  // operations can be scheduled in between. E.g. without flagging the
5416  // copies together, a TOC access in the caller could be scheduled between
5417  // the assignment of the callee TOC and the branch to the callee, which leads
5418  // to incorrect code.
5419 
5420  // Start by loading the function address from the descriptor.
5421  SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5422  auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5426 
5427  MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5428 
5429  // Registers used in building the DAG.
5430  const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5431  const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5432 
5433  // Offsets of descriptor members.
5434  const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5435  const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5436 
5437  const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5438  const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5439 
5440  // One load for the functions entry point address.
5441  SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5442  Alignment, MMOFlags);
5443 
5444  // One for loading the TOC anchor for the module that contains the called
5445  // function.
5446  SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5447  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5448  SDValue TOCPtr =
5449  DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5450  MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5451 
5452  // One for loading the environment pointer.
5453  SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5454  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5455  SDValue LoadEnvPtr =
5456  DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5457  MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5458 
5459 
5460  // Then copy the newly loaded TOC anchor to the TOC pointer.
5461  SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5462  Chain = TOCVal.getValue(0);
5463  Glue = TOCVal.getValue(1);
5464 
5465  // If the function call has an explicit 'nest' parameter, it takes the
5466  // place of the environment pointer.
5467  assert((!hasNest || !Subtarget.isAIXABI()) &&
5468  "Nest parameter is not supported on AIX.");
5469  if (!hasNest) {
5470  SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5471  Chain = EnvVal.getValue(0);
5472  Glue = EnvVal.getValue(1);
5473  }
5474 
5475  // The rest of the indirect call sequence is the same as the non-descriptor
5476  // DAG.
5477  prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5478 }
5479 
5480 static void
5482  PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5483  SelectionDAG &DAG,
5484  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5485  SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5486  const PPCSubtarget &Subtarget) {
5487  const bool IsPPC64 = Subtarget.isPPC64();
5488  // MVT for a general purpose register.
5489  const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5490 
5491  // First operand is always the chain.
5492  Ops.push_back(Chain);
5493 
5494  // If it's a direct call pass the callee as the second operand.
5495  if (!CFlags.IsIndirect)
5496  Ops.push_back(Callee);
5497  else {
5498  assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5499 
5500  // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5501  // on the stack (this would have been done in `LowerCall_64SVR4` or
5502  // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5503  // represents both the indirect branch and a load that restores the TOC
5504  // pointer from the linkage area. The operand for the TOC restore is an add
5505  // of the TOC save offset to the stack pointer. This must be the second
5506  // operand: after the chain input but before any other variadic arguments.
5507  // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5508  // saved or used.
5509  if (isTOCSaveRestoreRequired(Subtarget)) {
5510  const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5511 
5512  SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5513  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5514  SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5515  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5516  Ops.push_back(AddTOC);
5517  }
5518 
5519  // Add the register used for the environment pointer.
5520  if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5521  Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5522  RegVT));
5523 
5524 
5525  // Add CTR register as callee so a bctr can be emitted later.
5526  if (CFlags.IsTailCall)
5527  Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5528  }
5529 
5530  // If this is a tail call add stack pointer delta.
5531  if (CFlags.IsTailCall)
5532  Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5533 
5534  // Add argument registers to the end of the list so that they are known live
5535  // into the call.
5536  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5537  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5538  RegsToPass[i].second.getValueType()));
5539 
5540  // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5541  // no way to mark dependencies as implicit here.
5542  // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5543  if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5544  !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5545  Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5546 
5547  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5548  if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5549  Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5550 
5551  // Add a register mask operand representing the call-preserved registers.
5552  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5553  const uint32_t *Mask =
5555  assert(Mask && "Missing call preserved mask for calling convention");
5556  Ops.push_back(DAG.getRegisterMask(Mask));
5557 
5558  // If the glue is valid, it is the last operand.
5559  if (Glue.getNode())
5560  Ops.push_back(Glue);
5561 }
5562 
5563 SDValue PPCTargetLowering::FinishCall(
5564  CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5565  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5566  SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5567  unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5568  SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5569 
5570  if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5571  Subtarget.isAIXABI())
5572  setUsesTOCBasePtr(DAG);
5573 
5574  unsigned CallOpc =
5575  getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5576  Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5577 
5578  if (!CFlags.IsIndirect)
5579  Callee = transformCallee(Callee, DAG, dl, Subtarget);
5580  else if (Subtarget.usesFunctionDescriptors())
5581  prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5582  dl, CFlags.HasNest, Subtarget);
5583  else
5584  prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5585 
5586  // Build the operand list for the call instruction.
5588  buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5589  SPDiff, Subtarget);
5590 
5591  // Emit tail call.
5592  if (CFlags.IsTailCall) {
5593  // Indirect tail call when using PC Relative calls do not have the same
5594  // constraints.
5595  assert(((Callee.getOpcode() == ISD::Register &&
5596  cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5597  Callee.getOpcode() == ISD::TargetExternalSymbol ||
5598  Callee.getOpcode() == ISD::TargetGlobalAddress ||
5599  isa<ConstantSDNode>(Callee) ||
5600  (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5601  "Expecting a global address, external symbol, absolute value, "
5602  "register or an indirect tail call when PC Relative calls are "
5603  "used.");
5604  // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5605  assert(CallOpc == PPCISD::TC_RETURN &&
5606  "Unexpected call opcode for a tail call.");
5608  return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5609  }
5610 
5611  std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5612  Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5613  DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5614  Glue = Chain.getValue(1);
5615 
5616  // When performing tail call optimization the callee pops its arguments off
5617  // the stack. Account for this here so these bytes can be pushed back on in
5618  // PPCFrameLowering::eliminateCallFramePseudoInstr.
5619  int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5621  ? NumBytes
5622  : 0;
5623 
5624  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5625  DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5626  Glue, dl);
5627  Glue = Chain.getValue(1);
5628 
5629  return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5630  DAG, InVals);
5631 }
5632 
5633 SDValue
5634 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5635  SmallVectorImpl<SDValue> &InVals) const {
5636  SelectionDAG &DAG = CLI.DAG;
5637  SDLoc &dl = CLI.DL;
5639  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5641  SDValue Chain = CLI.Chain;
5642  SDValue Callee = CLI.Callee;
5643  bool &isTailCall = CLI.IsTailCall;
5644  CallingConv::ID CallConv = CLI.CallConv;
5645  bool isVarArg = CLI.IsVarArg;
5646  bool isPatchPoint = CLI.IsPatchPoint;
5647  const CallBase *CB = CLI.CB;
5648 
5649  if (isTailCall) {
5650  if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5651  isTailCall = false;
5652  else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5653  isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5654  Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5655  else
5656  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5657  Ins, DAG);
5658  if (isTailCall) {
5659  ++NumTailCalls;
5660  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5661  ++NumSiblingCalls;
5662 
5663  // PC Relative calls no longer guarantee that the callee is a Global
5664  // Address Node. The callee could be an indirect tail call in which
5665  // case the SDValue for the callee could be a load (to load the address
5666  // of a function pointer) or it may be a register copy (to move the
5667  // address of the callee from a function parameter into a virtual
5668  // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5669  assert((Subtarget.isUsingPCRelativeCalls() ||
5670  isa<GlobalAddressSDNode>(Callee)) &&
5671  "Callee should be an llvm::Function object.");
5672 
5673  LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5674  << "\nTCO callee: ");
5675  LLVM_DEBUG(Callee.dump());
5676  }
5677  }
5678 
5679  if (!isTailCall && CB && CB->isMustTailCall())
5680  report_fatal_error("failed to perform tail call elimination on a call "
5681  "site marked musttail");
5682 
5683  // When long calls (i.e. indirect calls) are always used, calls are always
5684  // made via function pointer. If we have a function name, first translate it
5685  // into a pointer.
5686  if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5687  !isTailCall)
5688  Callee = LowerGlobalAddress(Callee, DAG);
5689 
5690  CallFlags CFlags(
5691  CallConv, isTailCall, isVarArg, isPatchPoint,
5692  isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5693  // hasNest
5694  Subtarget.is64BitELFABI() &&
5695  any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5696  CLI.NoMerge);
5697 
5698  if (Subtarget.isAIXABI())
5699  return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5700  InVals, CB);
5701 
5702  assert(Subtarget.isSVR4ABI());
5703  if (Subtarget.isPPC64())
5704  return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5705  InVals, CB);
5706  return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5707  InVals, CB);
5708 }
5709 
5710 SDValue PPCTargetLowering::LowerCall_32SVR4(
5711  SDValue Chain, SDValue Callee, CallFlags CFlags,
5712  const SmallVectorImpl<ISD::OutputArg> &Outs,
5713  const SmallVectorImpl<SDValue> &OutVals,
5714  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5715  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5716  const CallBase *CB) const {
5717  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5718  // of the 32-bit SVR4 ABI stack frame layout.
5719 
5720  const CallingConv::ID CallConv = CFlags.CallConv;
5721  const bool IsVarArg = CFlags.IsVarArg;
5722  const bool IsTailCall = CFlags.IsTailCall;
5723 
5724  assert((CallConv == CallingConv::C ||
5725  CallConv == CallingConv::Cold ||
5726  CallConv == CallingConv::Fast) && "Unknown calling convention!");
5727 
5728  const Align PtrAlign(4);
5729 
5730  MachineFunction &MF = DAG.getMachineFunction();
5731 
5732  // Mark this function as potentially containing a function that contains a
5733  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5734  // and restoring the callers stack pointer in this functions epilog. This is
5735  // done because by tail calling the called function might overwrite the value
5736  // in this function's (MF) stack pointer stack slot 0(SP).
5737  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5738  CallConv == CallingConv::Fast)
5739  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5740 
5741  // Count how many bytes are to be pushed on the stack, including the linkage
5742  // area, parameter list area and the part of the local variable space which
5743  // contains copies of aggregates which are passed by value.
5744 
5745  // Assign locations to all of the outgoing arguments.
5747  PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5748 
5749  // Reserve space for the linkage area on the stack.
5750  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5751  PtrAlign);
5752  if (useSoftFloat())
5753  CCInfo.PreAnalyzeCallOperands(Outs);
5754 
5755  if (IsVarArg) {
5756  // Handle fixed and variable vector arguments differently.
5757  // Fixed vector arguments go into registers as long as registers are
5758  // available. Variable vector arguments always go into memory.
5759  unsigned NumArgs = Outs.size();
5760 
5761  for (unsigned i = 0; i != NumArgs; ++i) {
5762  MVT ArgVT = Outs[i].VT;
5763  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5764  bool Result;
5765 
5766  if (Outs[i].IsFixed) {
5767  Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5768  CCInfo);
5769  } else {
5771  ArgFlags, CCInfo);
5772  }
5773 
5774  if (Result) {
5775 #ifndef NDEBUG
5776  errs() << "Call operand #" << i << " has unhandled type "
5777  << EVT(ArgVT).getEVTString() << "\n";
5778 #endif
5779  llvm_unreachable(nullptr);
5780  }
5781  }
5782  } else {
5783  // All arguments are treated the same.
5784  CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5785  }
5786  CCInfo.clearWasPPCF128();
5787 
5788  // Assign locations to all of the outgoing aggregate by value arguments.
5789  SmallVector<CCValAssign, 16> ByValArgLocs;
5790  CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5791 
5792  // Reserve stack space for the allocations in CCInfo.
5793  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5794 
5795  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5796 
5797  // Size of the linkage area, parameter list area and the part of the local
5798  // space variable where copies of aggregates which are passed by value are
5799  // stored.
5800  unsigned NumBytes = CCByValInfo.getNextStackOffset();
5801 
5802  // Calculate by how many bytes the stack has to be adjusted in case of tail
5803  // call optimization.
5804  int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5805 
5806  // Adjust the stack pointer for the new arguments...
5807  // These operations are automatically eliminated by the prolog/epilog pass
5808  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5809  SDValue CallSeqStart = Chain;
5810 
5811  // Load the return address and frame pointer so it can be moved somewhere else
5812  // later.
5813  SDValue LROp, FPOp;
5814  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5815 
5816  // Set up a copy of the stack pointer for use loading and storing any
5817  // arguments that may not fit in the registers available for argument
5818  // passing.
5819  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5820 
5822  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5823  SmallVector<SDValue, 8> MemOpChains;
5824 
5825  bool seenFloatArg = false;
5826  // Walk the register/memloc assignments, inserting copies/loads.
5827  // i - Tracks the index into the list of registers allocated for the call
5828  // RealArgIdx - Tracks the index into the list of actual function arguments
5829  // j - Tracks the index into the list of byval arguments
5830  for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5831  i != e;
5832  ++i, ++RealArgIdx) {
5833  CCValAssign &VA = ArgLocs[i];
5834  SDValue Arg = OutVals[RealArgIdx];
5835  ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5836 
5837  if (Flags.isByVal()) {
5838  // Argument is an aggregate which is passed by value, thus we need to
5839  // create a copy of it in the local variable space of the current stack
5840  // frame (which is the stack frame of the caller) and pass the address of
5841  // this copy to the callee.
5842  assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5843  CCValAssign &ByValVA = ByValArgLocs[j++];
5844  assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5845 
5846  // Memory reserved in the local variable space of the callers stack frame.
5847  unsigned LocMemOffset = ByValVA.getLocMemOffset();
5848 
5849  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5850  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5851  StackPtr, PtrOff);
5852 
5853  // Create a copy of the argument in the local area of the current
5854  // stack frame.
5855  SDValue MemcpyCall =
5857  CallSeqStart.getNode()->getOperand(0),
5858  Flags, DAG, dl);
5859 
5860  // This must go outside the CALLSEQ_START..END.
5861  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5862  SDLoc(MemcpyCall));
5863  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5864  NewCallSeqStart.getNode());
5865  Chain = CallSeqStart = NewCallSeqStart;
5866 
5867  // Pass the address of the aggregate copy on the stack either in a
5868  // physical register or in the parameter list area of the current stack
5869  // frame to the callee.
5870  Arg = PtrOff;
5871  }
5872 
5873  // When useCRBits() is true, there can be i1 arguments.
5874  // It is because getRegisterType(MVT::i1) => MVT::i1,
5875  // and for other integer types getRegisterType() => MVT::i32.
5876  // Extend i1 and ensure callee will get i32.
5877  if (Arg.getValueType() == MVT::i1)
5879  dl, MVT::i32, Arg);
5880 
5881  if (VA.isRegLoc()) {
5882  seenFloatArg |= VA.getLocVT().isFloatingPoint();
5883  // Put argument in a physical register.
5884  if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5885  bool IsLE = Subtarget.isLittleEndian();
5886  SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5887  DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5888  RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5889  SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5890  DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5891  RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5892  SVal.getValue(0)));
5893  } else
5894  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5895  } else {
5896  // Put argument in the parameter list area of the current stack frame.
5897  assert(VA.isMemLoc());
5898  unsigned LocMemOffset = VA.getLocMemOffset();
5899 
5900  if (!IsTailCall) {
5901  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5902  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5903  StackPtr, PtrOff);
5904 
5905  MemOpChains.push_back(
5906  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5907  } else {
5908  // Calculate and remember argument location.
5909  CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5910  TailCallArguments);
5911  }
5912  }
5913  }
5914 
5915  if (!MemOpChains.empty())
5916  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5917 
5918  // Build a sequence of copy-to-reg nodes chained together with token chain
5919  // and flag operands which copy the outgoing args into the appropriate regs.
5920  SDValue InFlag;
5921  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5922  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5923  RegsToPass[i].second, InFlag);
5924  InFlag = Chain.getValue(1);
5925  }
5926 
5927  // Set CR bit 6 to true if this is a vararg call with floating args passed in
5928  // registers.
5929  if (IsVarArg) {
5930  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5931  SDValue Ops[] = { Chain, InFlag };
5932 
5933  Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5934  dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5935 
5936  InFlag = Chain.getValue(1);
5937  }
5938 
5939  if (IsTailCall)
5940  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5941  TailCallArguments);
5942 
5943  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5944  Callee, SPDiff, NumBytes, Ins, InVals, CB);
5945 }
5946 
5947 // Copy an argument into memory, being careful to do this outside the
5948 // call sequence for the call to which the argument belongs.
5949 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5950  SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5951  SelectionDAG &DAG, const SDLoc &dl) const {
5952  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5953  CallSeqStart.getNode()->getOperand(0),
5954  Flags, DAG, dl);
5955  // The MEMCPY must go outside the CALLSEQ_START..END.
5956  int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5957  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5958  SDLoc(MemcpyCall));
5959  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5960  NewCallSeqStart.getNode());
5961  return NewCallSeqStart;
5962 }
5963 
5964 SDValue PPCTargetLowering::LowerCall_64SVR4(
5965  SDValue Chain, SDValue Callee, CallFlags CFlags,
5966  const SmallVectorImpl<ISD::OutputArg> &Outs,
5967  const SmallVectorImpl<SDValue> &OutVals,
5968  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5969  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5970  const CallBase *CB) const {
5971  bool isELFv2ABI = Subtarget.isELFv2ABI();
5972  bool isLittleEndian = Subtarget.isLittleEndian();
5973  unsigned NumOps = Outs.size();
5974  bool IsSibCall = false;
5975  bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5976 
5977  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5978  unsigned PtrByteSize = 8;
5979 
5980  MachineFunction &MF = DAG.getMachineFunction();
5981 
5982  if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5983  IsSibCall = true;
5984 
5985  // Mark this function as potentially containing a function that contains a
5986  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5987  // and restoring the callers stack pointer in this functions epilog. This is
5988  // done because by tail calling the called function might overwrite the value
5989  // in this function's (MF) stack pointer stack slot 0(SP).
5990  if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5991  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5992 
5993  assert(!(IsFastCall && CFlags.IsVarArg) &&
5994  "fastcc not supported on varargs functions");
5995 
5996  // Count how many bytes are to be pushed on the stack, including the linkage
5997  // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5998  // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5999  // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6000  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6001  unsigned NumBytes = LinkageSize;
6002  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6003 
6004  static const MCPhysReg GPR[] = {
6005  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6006  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6007  };
6008  static const MCPhysReg VR[] = {
6009  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6010  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6011  };
6012 
6013  const unsigned NumGPRs = array_lengthof(GPR);
6014  const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6015  const unsigned NumVRs = array_lengthof(VR);
6016 
6017  // On ELFv2, we can avoid allocating the parameter area if all the arguments
6018  // can be passed to the callee in registers.
6019  // For the fast calling convention, there is another check below.
6020  // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6021  bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6022  if (!HasParameterArea) {
6023  unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6024  unsigned AvailableFPRs = NumFPRs;
6025  unsigned AvailableVRs = NumVRs;
6026  unsigned NumBytesTmp = NumBytes;
6027  for (unsigned i = 0; i != NumOps; ++i) {
6028  if (Outs[i].Flags.isNest()) continue;
6029  if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6030  PtrByteSize, LinkageSize, ParamAreaSize,
6031  NumBytesTmp, AvailableFPRs, AvailableVRs))
6032  HasParameterArea = true;
6033  }
6034  }
6035 
6036  // When using the fast calling convention, we don't provide backing for
6037  // arguments that will be in registers.
6038  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6039 
6040  // Avoid allocating parameter area for fastcc functions if all the arguments
6041  // can be passed in the registers.
6042  if (IsFastCall)
6043  HasParameterArea = false;
6044 
6045  // Add up all the space actually used.
6046  for (unsigned i = 0; i != NumOps; ++i) {
6047  ISD::ArgFlagsTy Flags = Outs[i].Flags;
6048  EVT ArgVT = Outs[i].VT;
6049  EVT OrigVT = Outs[i].ArgVT;
6050 
6051  if (Flags.isNest())
6052  continue;
6053 
6054  if (IsFastCall) {
6055  if (Flags.isByVal()) {
6056  NumGPRsUsed += (Flags.getByValSize()+7)/8;
6057  if (NumGPRsUsed > NumGPRs)
6058  HasParameterArea = true;
6059  } else {
6060  switch (ArgVT.getSimpleVT().SimpleTy) {
6061  default: llvm_unreachable("Unexpected ValueType for argument!");
6062  case MVT::i1:
6063  case MVT::i32:
6064  case MVT::i64:
6065  if (++NumGPRsUsed <= NumGPRs)
6066  continue;
6067  break;
6068  case MVT::v4i32:
6069  case MVT::v8i16:
6070  case MVT::v16i8:
6071  case MVT::v2f64:
6072  case MVT::v2i64:
6073  case MVT::v1i128:
6074  case MVT::f128:
6075  if (++NumVRsUsed <= NumVRs)
6076  continue;
6077  break;
6078  case MVT::v4f32:
6079  if (++NumVRsUsed <= NumVRs)
6080  continue;
6081  break;
6082  case MVT::f32:
6083  case MVT::f64:
6084  if (++NumFPRsUsed <= NumFPRs)
6085  continue;
6086  break;
6087  }
6088  HasParameterArea = true;
6089  }
6090  }
6091 
6092  /* Respect alignment of argument on the stack. */
6093  auto Alignement =
6094  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6095  NumBytes = alignTo(NumBytes, Alignement);
6096 
6097  NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6098  if (Flags.isInConsecutiveRegsLast())
6099  NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6100  }
6101 
6102  unsigned NumBytesActuallyUsed = NumBytes;
6103 
6104  // In the old ELFv1 ABI,
6105  // the prolog code of the callee may store up to 8 GPR argument registers to
6106  // the stack, allowing va_start to index over them in memory if its varargs.
6107  // Because we cannot tell if this is needed on the caller side, we have to
6108  // conservatively assume that it is needed. As such, make sure we have at
6109  // least enough stack space for the caller to store the 8 GPRs.
6110  // In the ELFv2 ABI, we allocate the parameter area iff a callee
6111  // really requires memory operands, e.g. a vararg function.
6112  if (HasParameterArea)
6113  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6114  else
6115  NumBytes = LinkageSize;
6116 
6117  // Tail call needs the stack to be aligned.
6118  if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6119  NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6120 
6121  int SPDiff = 0;
6122 
6123  // Calculate by how many bytes the stack has to be adjusted in case of tail
6124  // call optimization.
6125  if (!IsSibCall)
6126  SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6127 
6128  // To protect arguments on the stack from being clobbered in a tail call,
6129  // force all the loads to happen before doing any other lowering.
6130  if (CFlags.IsTailCall)
6131  Chain = DAG.getStackArgumentTokenFactor(Chain);
6132 
6133  // Adjust the stack pointer for the new arguments...
6134  // These operations are automatically eliminated by the prolog/epilog pass
6135  if (!IsSibCall)
6136  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6137  SDValue CallSeqStart = Chain;
6138 
6139  // Load the return address and frame pointer so it can be move somewhere else
6140  // later.
6141  SDValue LROp, FPOp;
6142  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6143 
6144  // Set up a copy of the stack pointer for use loading and storing any
6145  // arguments that may not fit in the registers available for argument
6146  // passing.
6147  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6148 
6149  // Figure out which arguments are going to go in registers, and which in
6150  // memory. Also, if this is a vararg function, floating point operations
6151  // must be stored to our stack, and loaded into integer regs as well, if
6152  // any integer regs are available for argument passing.
6153  unsigned ArgOffset = LinkageSize;
6154 
6156  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6157 
6158  SmallVector<SDValue, 8> MemOpChains;
6159  for (unsigned i = 0; i != NumOps; ++i) {
6160  SDValue Arg = OutVals[i];
6161  ISD::ArgFlagsTy Flags = Outs[i].Flags;
6162  EVT ArgVT = Outs[i].VT;
6163  EVT OrigVT = Outs[i].ArgVT;
6164 
6165  // PtrOff will be used to store the current argument to the stack if a
6166  // register cannot be found for it.
6167  SDValue PtrOff;
6168 
6169  // We re-align the argument offset for each argument, except when using the
6170  // fast calling convention, when we need to make sure we do that only when
6171  // we'll actually use a stack slot.
6172  auto ComputePtrOff = [&]() {
6173  /* Respect alignment of argument on the stack. */
6174  auto Alignment =
6175  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6176  ArgOffset = alignTo(ArgOffset, Alignment);
6177 
6178  PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6179 
6180  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6181  };
6182 
6183  if (!IsFastCall) {
6184  ComputePtrOff();
6185 
6186  /* Compute GPR index associated with argument offset. */
6187  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6188  GPR_idx = std::min(GPR_idx, NumGPRs);
6189  }
6190 
6191  // Promote integers to 64-bit values.
6192  if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6193  // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6194  unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6195  Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6196  }
6197 
6198  // FIXME memcpy is used way more than necessary. Correctness first.
6199  // Note: "by value" is code for passing a structure by value, not
6200  // basic types.
6201  if (Flags.isByVal()) {
6202  // Note: Size includes alignment padding, so
6203  // struct x { short a; char b; }
6204  // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6205  // These are the proper values we need for right-justifying the
6206  // aggregate in a parameter register.
6207  unsigned Size = Flags.getByValSize();
6208 
6209  // An empty aggregate parameter takes up no storage and no
6210  // registers.
6211  if (Size == 0)
6212  continue;
6213 
6214  if (IsFastCall)
6215  ComputePtrOff();
6216 
6217  // All aggregates smaller than 8 bytes must be passed right-justified.
6218  if (Size==1 || Size==2 || Size==4) {
6219  EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6220  if (GPR_idx != NumGPRs) {
6221  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6222  MachinePointerInfo(), VT);
6223  MemOpChains.push_back(Load.getValue(1));
6224  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6225 
6226  ArgOffset += PtrByteSize;
6227  continue;
6228  }
6229  }
6230 
6231  if (GPR_idx == NumGPRs && Size < 8) {
6232  SDValue AddPtr = PtrOff;
6233  if (!isLittleEndian) {
6234  SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6235  PtrOff.getValueType());
6236  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6237  }
6238  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6239  CallSeqStart,
6240  Flags, DAG, dl);
6241  ArgOffset += PtrByteSize;
6242  continue;
6243  }
6244  // Copy the object to parameter save area if it can not be entirely passed
6245  // by registers.
6246  // FIXME: we only need to copy the parts which need to be passed in
6247  // parameter save area. For the parts passed by registers, we don't need
6248  // to copy them to the stack although we need to allocate space for them
6249  // in parameter save area.
6250  if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6251  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6252  CallSeqStart,
6253  Flags, DAG, dl);
6254 
6255  // When a register is available, pass a small aggregate right-justified.
6256  if (Size < 8 && GPR_idx != NumGPRs) {
6257  // The easiest way to get this right-justified in a register
6258  // is to copy the structure into the rightmost portion of a
6259  // local variable slot, then load the whole slot into the
6260  // register.
6261  // FIXME: The memcpy seems to produce pretty awful code for
6262  // small aggregates, particularly for packed ones.
6263  // FIXME: It would be preferable to use the slot in the
6264  // parameter save area instead of a new local variable.
6265  SDValue AddPtr = PtrOff;
6266  if (!isLittleEndian) {
6267  SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6268  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6269  }
6270  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6271  CallSeqStart,
6272  Flags, DAG, dl);
6273 
6274  // Load the slot into the register.
6275  SDValue Load =
6276  DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6277  MemOpChains.push_back(Load.getValue(1));
6278  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6279 
6280  // Done with this argument.
6281  ArgOffset += PtrByteSize;
6282  continue;
6283  }
6284 
6285  // For aggregates larger than PtrByteSize, copy the pieces of the
6286  // object that fit into registers from the parameter save area.
6287  for (unsigned j=0; j<Size; j+=PtrByteSize) {
6288  SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6289  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6290  if (GPR_idx != NumGPRs) {
6291  unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6292  EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6293  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6294  MachinePointerInfo(), ObjType);
6295 
6296  MemOpChains.push_back(Load.getValue(1));
6297  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6298  ArgOffset += PtrByteSize;
6299  } else {
6300  ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6301  break;
6302  }
6303  }
6304  continue;
6305  }
6306 
6307  switch (Arg.getSimpleValueType().SimpleTy) {
6308  default: llvm_unreachable("Unexpected ValueType for argument!");
6309  case MVT::i1:
6310  case MVT::i32:
6311  case MVT::i64:
6312  if (Flags.isNest()) {
6313  // The 'nest' parameter, if any, is passed in R11.
6314  RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6315  break;
6316  }
6317 
6318  // These can be scalar arguments or elements of an integer array type
6319  // passed directly. Clang may use those instead of "byval" aggregate
6320  // types to avoid forcing arguments to memory unnecessarily.
6321  if (GPR_idx != NumGPRs) {
6322  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6323  } else {
6324  if (IsFastCall)
6325  ComputePtrOff();
6326 
6327  assert(HasParameterArea &&
6328  "Parameter area must exist to pass an argument in memory.");
6329  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6330  true, CFlags.IsTailCall, false, MemOpChains,
6331  TailCallArguments, dl);
6332  if (IsFastCall)
6333  ArgOffset += PtrByteSize;
6334  }
6335  if (!IsFastCall)
6336  ArgOffset += PtrByteSize;
6337  break;
6338  case MVT::f32:
6339  case MVT::f64: {
6340  // These can be scalar arguments or elements of a float array type
6341  // passed directly. The latter are used to implement ELFv2 homogenous
6342  // float aggregates.
6343 
6344  // Named arguments go into FPRs first, and once they overflow, the
6345  // remaining arguments go into GPRs and then the parameter save area.
6346  // Unnamed arguments for vararg functions always go to GPRs and
6347  // then the parameter save area. For now, put all arguments to vararg
6348  // routines always in both locations (FPR *and* GPR or stack slot).
6349  bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6350  bool NeededLoad = false;
6351 
6352  // First load the argument into the next available FPR.
6353  if (FPR_idx != NumFPRs)
6354  RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6355 
6356  // Next, load the argument into GPR or stack slot if needed.
6357  if (!NeedGPROrStack)
6358  ;
6359  else if (GPR_idx != NumGPRs && !IsFastCall) {
6360  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6361  // once we support fp <-> gpr moves.
6362 
6363  // In the non-vararg case, this can only ever happen in the
6364  // presence of f32 array types, since otherwise we never run
6365  // out of FPRs before running out of GPRs.
6366  SDValue ArgVal;
6367 
6368  // Double values are always passed in a single GPR.
6369  if (Arg.getValueType() != MVT::f32) {
6370  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6371 
6372  // Non-array float values are extended and passed in a GPR.
6373  } else if (!Flags.isInConsecutiveRegs()) {
6374  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6375  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6376 
6377  // If we have an array of floats, we collect every odd element
6378  // together with its predecessor into one GPR.
6379  } else if (ArgOffset % PtrByteSize != 0) {
6380  SDValue Lo, Hi;
6381  Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6382  Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6383  if (!isLittleEndian)
6384  std::swap(Lo, Hi);
6385  ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6386 
6387  // The final element, if even, goes into the first half of a GPR.
6388  } else if (Flags.isInConsecutiveRegsLast()) {
6389  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6390  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6391  if (!isLittleEndian)
6392  ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6393  DAG.getConstant(32, dl, MVT::i32));
6394 
6395  // Non-final even elements are skipped; they will be handled
6396  // together the with subsequent argument on the next go-around.
6397  } else
6398  ArgVal = SDValue();
6399 
6400  if (ArgVal.getNode())
6401  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6402  } else {
6403  if (IsFastCall)
6404  ComputePtrOff();
6405 
6406  // Single-precision floating-point values are mapped to the
6407  // second (rightmost) word of the stack doubleword.
6408  if (Arg.getValueType() == MVT::f32 &&
6409  !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6410  SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6411  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6412  }
6413 
6414  assert(HasParameterArea &&
6415  "Parameter area must exist to pass an argument in memory.");
6416  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6417  true, CFlags.IsTailCall, false, MemOpChains,
6418  TailCallArguments, dl);
6419 
6420  NeededLoad = true;
6421  }
6422  // When passing an array of floats, the array occupies consecutive
6423  // space in the argument area; only round up to the next doubleword
6424  // at the end of the array. Otherwise, each float takes 8 bytes.
6425  if (!IsFastCall || NeededLoad) {
6426  ArgOffset += (Arg.getValueType() == MVT::f32 &&
6427  Flags.isInConsecutiveRegs()) ? 4 : 8;
6428  if (Flags.isInConsecutiveRegsLast())
6429  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6430  }
6431  break;
6432  }
6433  case MVT::v4f32:
6434  case MVT::v4i32:
6435  case MVT::v8i16:
6436  case MVT::v16i8:
6437  case MVT::v2f64:
6438  case MVT::v2i64:
6439  case MVT::v1i128:
6440  case MVT::f128:
6441  // These can be scalar arguments or elements of a vector array type
6442  // passed directly. The latter are used to implement ELFv2 homogenous
6443  // vector aggregates.
6444 
6445  // For a varargs call, named arguments go into VRs or on the stack as
6446  // usual; unnamed arguments always go to the stack or the corresponding
6447  // GPRs when within range. For now, we always put the value in both
6448  // locations (or even all three).
6449  if (CFlags.IsVarArg) {
6450  assert(HasParameterArea &&
6451  "Parameter area must exist if we have a varargs call.");
6452  // We could elide this store in the case where the object fits
6453  // entirely in R registers. Maybe later.
6454  SDValue Store =
6455  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6456  MemOpChains.push_back(Store);
6457  if (VR_idx != NumVRs) {
6458  SDValue Load =
6459  DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6460  MemOpChains.push_back(Load.getValue(1));
6461  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6462  }
6463  ArgOffset += 16;
6464  for (unsigned i=0; i<16; i+=PtrByteSize) {
6465  if (GPR_idx == NumGPRs)
6466  break;
6467  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6468  DAG.getConstant(i, dl, PtrVT));
6469  SDValue Load =
6470  DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6471  MemOpChains.push_back(Load.getValue(1));
6472  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6473  }
6474  break;
6475  }
6476 
6477  // Non-varargs Altivec params go into VRs or on the stack.
6478  if (VR_idx != NumVRs) {
6479  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6480  } else {
6481  if (IsFastCall)
6482  ComputePtrOff();
6483 
6484  assert(HasParameterArea &&
6485  "Parameter area must exist to pass an argument in memory.");
6486  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6487  true, CFlags.IsTailCall, true, MemOpChains,
6488  TailCallArguments, dl);
6489  if (IsFastCall)
6490  ArgOffset += 16;
6491  }
6492 
6493  if (!IsFastCall)
6494  ArgOffset += 16;
6495  break;
6496  }
6497  }
6498 
6499  assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6500  "mismatch in size of parameter area");
6501  (void)NumBytesActuallyUsed;
6502 
6503  if (!MemOpChains.empty())
6504  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6505 
6506  // Check if this is an indirect call (MTCTR/BCTRL).
6507  // See prepareDescriptorIndirectCall and buildCallOperands for more
6508  // information about calls through function pointers in the 64-bit SVR4 ABI.
6509  if (CFlags.IsIndirect) {
6510  // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6511  // caller in the TOC save area.
6512  if (isTOCSaveRestoreRequired(Subtarget)) {
6513  assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6514  // Load r2 into a virtual register and store it to the TOC save area.
6515  setUsesTOCBasePtr(DAG);
6516  SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6517  // TOC save area offset.
6518  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6519  SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6520  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6521  Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6523  DAG.getMachineFunction(), TOCSaveOffset));
6524  }
6525  // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6526  // This does not mean the MTCTR instruction must use R12; it's easier
6527  // to model this as an extra parameter, so do that.
6528  if (isELFv2ABI && !CFlags.IsPatchPoint)
6529  RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6530  }
6531 
6532  // Build a sequence of copy-to-reg nodes chained together with token chain
6533  // and flag operands which copy the outgoing args into the appropriate regs.
6534  SDValue InFlag;
6535  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6536  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6537  RegsToPass[i].second, InFlag);
6538  InFlag = Chain.getValue(1);
6539  }
6540 
6541  if (CFlags.IsTailCall && !IsSibCall)
6542  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6543  TailCallArguments);
6544 
6545  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6546  Callee, SPDiff, NumBytes, Ins, InVals, CB);
6547 }
6548 
6549 // Returns true when the shadow of a general purpose argument register
6550 // in the parameter save area is aligned to at least 'RequiredAlign'.
6551 static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6552  assert(RequiredAlign.value() <= 16 &&
6553  "Required alignment greater than stack alignment.");
6554  switch (Reg) {
6555  default:
6556  report_fatal_error("called on invalid register.");
6557  case PPC::R5:
6558  case PPC::R9:
6559  case PPC::X3:
6560  case PPC::X5:
6561  case PPC::X7:
6562  case PPC::X9:
6563  // These registers are 16 byte aligned which is the most strict aligment
6564  // we can support.
6565  return true;
6566  case PPC::R3:
6567  case PPC::R7:
6568  case PPC::X4:
6569  case PPC::X6:
6570  case PPC::X8:
6571  case PPC::X10:
6572  // The shadow of these registers in the PSA is 8 byte aligned.
6573  return RequiredAlign <= 8;
6574  case PPC::R4:
6575  case PPC::R6:
6576  case PPC::R8:
6577  case PPC::R10:
6578  return RequiredAlign <= 4;
6579  }
6580 }
6581 
6582 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6583  CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6584  CCState &S) {
6585  AIXCCState &State = static_cast<AIXCCState &>(S);
6586  const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6587  State.getMachineFunction().getSubtarget());
6588  const bool IsPPC64 = Subtarget.isPPC64();
6589  const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6590  const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6591 
6592  if (ValVT == MVT::f128)
6593  report_fatal_error("f128 is unimplemented on AIX.");
6594 
6595  if (ArgFlags.isNest())
6596  report_fatal_error("Nest arguments are unimplemented.");
6597 
6598  static const MCPhysReg GPR_32[] = {// 32-bit registers.
6599  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6600  PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6601  static const MCPhysReg GPR_64[] = {// 64-bit registers.
6602  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6603  PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6604 
6605  static const MCPhysReg VR[] = {// Vector registers.
6606  PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6607  PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6608  PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6609 
6610  if (ArgFlags.isByVal()) {
6611  if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6612  report_fatal_error("Pass-by-value arguments with alignment greater than "
6613  "register width are not supported.");
6614 
6615  const unsigned ByValSize = ArgFlags.getByValSize();
6616 
6617  // An empty aggregate parameter takes up no storage and no registers,
6618  // but needs a MemLoc for a stack slot for the formal arguments side.
6619  if (ByValSize == 0) {
6621  State.getNextStackOffset(), RegVT,
6622  LocInfo));
6623  return false;
6624  }
6625 
6626  const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6627  unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6628  for (const unsigned E = Offset + StackSize; Offset < E;
6629  Offset += PtrAlign.value()) {
6630  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6631  State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6632  else {
6635  LocInfo));
6636  break;
6637  }
6638  }
6639  return false;
6640  }
6641 
6642  // Arguments always reserve parameter save area.
6643  switch (ValVT.SimpleTy) {
6644  default:
6645  report_fatal_error("Unhandled value type for argument.");
6646  case MVT::i64:
6647  // i64 arguments should have been split to i32 for PPC32.
6648  assert(IsPPC64 && "PPC32 should have split i64 values.");
6650  case MVT::i1:
6651  case MVT::i32: {
6652  const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6653  // AIX integer arguments are always passed in register width.
6654  if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6655  LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6656  : CCValAssign::LocInfo::ZExt;
6657  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6658  State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6659  else
6660  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6661 
6662  return false;
6663  }
6664  case MVT::f32:
6665  case MVT::f64: {
6666  // Parameter save area (PSA) is reserved even if the float passes in fpr.
6667  const unsigned StoreSize = LocVT.getStoreSize();
6668  // Floats are always 4-byte aligned in the PSA on AIX.
6669  // This includes f64 in 64-bit mode for ABI compatibility.
6670  const unsigned Offset =
6671  State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6672  unsigned FReg = State.AllocateReg(FPR);
6673  if (FReg)
6674  State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6675 
6676  // Reserve and initialize GPRs or initialize the PSA as required.
6677  for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6678  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6679  assert(FReg && "An FPR should be available when a GPR is reserved.");
6680  if (State.isVarArg()) {
6681  // Successfully reserved GPRs are only initialized for vararg calls.
6682  // Custom handling is required for:
6683  // f64 in PPC32 needs to be split into 2 GPRs.
6684  // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6685  State.addLoc(
6686  CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6687  }
6688  } else {
6689  // If there are insufficient GPRs, the PSA needs to be initialized.
6690  // Initialization occurs even if an FPR was initialized for
6691  // compatibility with the AIX XL compiler. The full memory for the
6692  // argument will be initialized even if a prior word is saved in GPR.
6693  // A custom memLoc is used when the argument also passes in FPR so
6694  // that the callee handling can skip over it easily.
6695  State.addLoc(
6696  FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6697  LocInfo)
6698  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6699  break;
6700  }
6701  }
6702 
6703  return false;
6704  }
6705  case MVT::v4f32:
6706  case MVT::v4i32:
6707  case MVT::v8i16:
6708  case MVT::v16i8:
6709  case MVT::v2i64:
6710  case MVT::v2f64:
6711  case MVT::v1i128: {
6712  const unsigned VecSize = 16;
6713  const Align VecAlign(VecSize);
6714 
6715  if (!State.isVarArg()) {
6716  // If there are vector registers remaining we don't consume any stack
6717  // space.
6718  if (unsigned VReg = State.AllocateReg(VR)) {
6719  State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6720  return false;
6721  }
6722  // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6723  // might be allocated in the portion of the PSA that is shadowed by the
6724  // GPRs.
6725  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6726  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6727  return false;
6728  }
6729 
6730  const unsigned PtrSize = IsPPC64 ? 8 : 4;
6731  ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6732 
6733  unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6734  // Burn any underaligned registers and their shadowed stack space until
6735  // we reach the required alignment.
6736  while (NextRegIndex != GPRs.size() &&
6737  !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6738  // Shadow allocate register and its stack shadow.
6739  unsigned Reg = State.AllocateReg(GPRs);
6740  State.AllocateStack(PtrSize, PtrAlign);
6741  assert(Reg && "Allocating register unexpectedly failed.");
6742  (void)Reg;
6743  NextRegIndex = State.getFirstUnallocated(GPRs);
6744  }
6745 
6746  // Vectors that are passed as fixed arguments are handled differently.
6747  // They are passed in VRs if any are available (unlike arguments passed
6748  // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6749  // functions)
6750  if (State.isFixed(ValNo)) {
6751  if (unsigned VReg = State.AllocateReg(VR)) {
6752  State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6753  // Shadow allocate GPRs and stack space even though we pass in a VR.
6754  for (unsigned I = 0; I != VecSize; I += PtrSize)
6755  State.AllocateReg(GPRs);
6756  State.AllocateStack(VecSize, VecAlign);
6757  return false;
6758  }
6759  // No vector registers remain so pass on the stack.
6760  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6761  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6762  return false;
6763  }
6764 
6765  // If all GPRS are consumed then we pass the argument fully on the stack.
6766  if (NextRegIndex == GPRs.size()) {
6767  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6768  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6769  return false;
6770  }
6771 
6772  // Corner case for 32-bit codegen. We have 2 registers to pass the first
6773  // half of the argument, and then need to pass the remaining half on the
6774  // stack.
6775  if (GPRs[NextRegIndex] == PPC::R9) {
6776  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6777  State.addLoc(
6778  CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6779 
6780  const unsigned FirstReg = State.AllocateReg(PPC::R9);
6781  const unsigned SecondReg = State.AllocateReg(PPC::R10);
6782  assert(FirstReg && SecondReg &&
6783  "Allocating R9 or R10 unexpectedly failed.");
6784  State.addLoc(
6785  CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6786  State.addLoc(
6787  CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6788  return false;
6789  }
6790 
6791  // We have enough GPRs to fully pass the vector argument, and we have
6792  // already consumed any underaligned registers. Start with the custom
6793  // MemLoc and then the custom RegLocs.
6794  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6795  State.addLoc(
6796  CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6797  for (unsigned I = 0; I != VecSize; I += PtrSize) {
6798  const unsigned Reg = State.AllocateReg(GPRs);
6799  assert(Reg && "Failed to allocated register for vararg vector argument");
6800  State.addLoc(
6801  CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6802  }
6803  return false;
6804  }
6805  }
6806  return true;
6807 }
6808 
6809 // So far, this function is only used by LowerFormalArguments_AIX()
6811  bool IsPPC64,
6812  bool HasP8Vector,
6813  bool HasVSX) {
6814  assert((IsPPC64 || SVT != MVT::i64) &&
6815  "i64 should have been split for 32-bit codegen.");
6816 
6817  switch (SVT) {
6818  default:
6819  report_fatal_error("Unexpected value type for formal argument");
6820  case MVT::i1:
6821  case MVT::i32:
6822  case MVT::i64:
6823  return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6824  case MVT::f32:
6825  return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6826  case MVT::f64:
6827  return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6828  case MVT::v4f32:
6829  case MVT::v4i32:
6830  case MVT::v8i16:
6831  case MVT::v16i8:
6832  case MVT::v2i64:
6833  case MVT::v2f64:
6834  case MVT::v1i128:
6835  return &PPC::VRRCRegClass;
6836  }
6837 }
6838 
6840  SelectionDAG &DAG, SDValue ArgValue,
6841  MVT LocVT, const SDLoc &dl) {
6842  assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
6843  assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
6844 
6845  if (Flags.isSExt())
6846  ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6847  DAG.getValueType(ValVT));
6848  else if (Flags.isZExt())
6849  ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6850  DAG.getValueType(ValVT));
6851 
6852  return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6853 }
6854 
6855 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6856  const unsigned LASize = FL->getLinkageSize();
6857 
6858  if (PPC::GPRCRegClass.contains(Reg)) {
6859  assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
6860  "Reg must be a valid argument register!");
6861  return LASize + 4 * (Reg - PPC::R3);
6862  }
6863 
6864  if (PPC::G8RCRegClass.contains(Reg)) {
6865  assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
6866  "Reg must be a valid argument register!");
6867  return LASize + 8 * (Reg - PPC::X3);
6868  }
6869 
6870  llvm_unreachable("Only general purpose registers expected.");
6871 }
6872 
6873 // AIX ABI Stack Frame Layout:
6874 //
6875 // Low Memory +--------------------------------------------+
6876 // SP +---> | Back chain | ---+
6877 // | +--------------------------------------------+ |
6878 // | | Saved Condition Register | |
6879 // | +--------------------------------------------+ |
6880 // | | Saved Linkage Register | |
6881 // | +--------------------------------------------+ | Linkage Area
6882 // | | Reserved for compilers | |
6883 // | +--------------------------------------------+ |
6884 // | | Reserved for binders | |
6885 // | +--------------------------------------------+ |
6886 // | | Saved TOC pointer | ---+
6887 // | +--------------------------------------------+
6888 // | | Parameter save area |
6889 // | +--------------------------------------------+
6890 // | | Alloca space |
6891 // | +--------------------------------------------+
6892 // | | Local variable space |
6893 // | +--------------------------------------------+
6894 // | | Float/int conversion temporary |
6895 // | +--------------------------------------------+
6896 // | | Save area for AltiVec registers |
6897 // | +--------------------------------------------+
6898 // | | AltiVec alignment padding |
6899 // | +--------------------------------------------+
6900 // | | Save area for VRSAVE register |
6901 // | +--------------------------------------------+
6902 // | | Save area for General Purpose registers |
6903 // | +--------------------------------------------+
6904 // | | Save area for Floating Point registers |
6905 // | +--------------------------------------------+
6906 // +---- | Back chain |
6907 // High Memory +--------------------------------------------+
6908 //
6909 // Specifications:
6910 // AIX 7.2 Assembler Language Reference
6911 // Subroutine linkage convention
6912 
6913 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6914  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
6915  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6916  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6917 
6918  assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
6919  CallConv == CallingConv::Fast) &&
6920  "Unexpected calling convention!");
6921 
6922  if (getTargetMachine().Options.GuaranteedTailCallOpt)
6923  report_fatal_error("Tail call support is unimplemented on AIX.");
6924 
6925  if (useSoftFloat())
6926  report_fatal_error("Soft float support is unimplemented on AIX.");
6927 
6928  const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
6929 
6930  const bool IsPPC64 = Subtarget.isPPC64();
6931  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6932 
6933  // Assign locations to all of the incoming arguments.
6935  MachineFunction &MF = DAG.getMachineFunction();
6936  MachineFrameInfo &MFI = MF.getFrameInfo();
6937  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
6938  AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
6939 
6940  const EVT PtrVT = getPointerTy(MF.getDataLayout());
6941  // Reserve space for the linkage area on the stack.
6942  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6943  CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6944  CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
6945 
6946  SmallVector<SDValue, 8> MemOps;
6947 
6948  for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
6949  CCValAssign &VA = ArgLocs[I++];
6950  MVT LocVT = VA.getLocVT();
6951  MVT ValVT = VA.getValVT();
6952  ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
6953  // For compatibility with the AIX XL compiler, the float args in the
6954  // parameter save area are initialized even if the argument is available
6955  // in register. The caller is required to initialize both the register
6956  // and memory, however, the callee can choose to expect it in either.
6957  // The memloc is dismissed here because the argument is retrieved from
6958  // the register.
6959  if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
6960  continue;
6961 
6962  auto HandleMemLoc = [&]() {
6963  const unsigned LocSize = LocVT.getStoreSize();
6964  const unsigned ValSize = ValVT.getStoreSize();
6965  assert((ValSize <= LocSize) &&
6966  "Object size is larger than size of MemLoc");
6967  int CurArgOffset = VA.getLocMemOffset();
6968  // Objects are right-justified because AIX is big-endian.
6969  if (LocSize > ValSize)
6970  CurArgOffset += LocSize - ValSize;
6971  // Potential tail calls could cause overwriting of argument stack slots.
6972  const bool IsImmutable =
6974  (CallConv == CallingConv::Fast));
6975  int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
6976  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6977  SDValue ArgValue =
6978  DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
6979  InVals.push_back(ArgValue);
6980  };
6981 
6982  // Vector arguments to VaArg functions are passed both on the stack, and
6983  // in any available GPRs. Load the value from the stack and add the GPRs
6984  // as live ins.
6985  if (VA.isMemLoc() && VA.needsCustom()) {
6986  assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
6987  assert(isVarArg && "Only use custom memloc for vararg.");
6988  // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
6989  // matching custom RegLocs.
6990  const unsigned OriginalValNo = VA.getValNo();
6991  (void)OriginalValNo;
6992 
6993  auto HandleCustomVecRegLoc = [&]() {
6994  assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
6995  "Missing custom RegLoc.");
6996  VA = ArgLocs[I++];
6997  assert(VA.getValVT().isVector() &&
6998  "Unexpected Val type for custom RegLoc.");
6999  assert(VA.getValNo() == OriginalValNo &&
7000  "ValNo mismatch between custom MemLoc and RegLoc.");
7002  MF.addLiveIn(VA.getLocReg(),
7003  getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7004  Subtarget.hasVSX()));
7005  };
7006 
7007  HandleMemLoc();
7008  // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7009  // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7010  // R10.
7011  HandleCustomVecRegLoc();
7012  HandleCustomVecRegLoc();
7013 
7014  // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7015  // we passed the vector in R5, R6, R7 and R8.
7016  if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7017  assert(!IsPPC64 &&
7018  "Only 2 custom RegLocs expected for 64-bit codegen.");
7019  HandleCustomVecRegLoc();
7020  HandleCustomVecRegLoc();
7021  }
7022 
7023  continue;
7024  }
7025 
7026  if (VA.isRegLoc()) {
7027  if (VA.getValVT().isScalarInteger())
7029  else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7030  switch (VA.getValVT().SimpleTy) {
7031  default:
7032  report_fatal_error("Unhandled value type for argument.");
7033  case MVT::f32:
7035  break;
7036  case MVT::f64:
7038  break;
7039  }
7040  } else if (VA.getValVT().isVector()) {
7041  switch (VA.getValVT().SimpleTy) {
7042  default:
7043  report_fatal_error("Unhandled value type for argument.");
7044  case MVT::v16i8:
7046  break;
7047  case MVT::v8i16:
7049  break;
7050  case MVT::v4i32:
7051  case MVT::v2i64:
7052  case MVT::v1i128:
7054  break;
7055  case MVT::v4f32:
7056  case MVT::v2f64:
7058  break;
7059  }
7060  }
7061  }
7062 
7063  if (Flags.isByVal() && VA.isMemLoc()) {
7064  const unsigned Size =
7065  alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7066  PtrByteSize);
7067  const int FI = MF.getFrameInfo().CreateFixedObject(
7068  Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7069  /* IsAliased */ true);
7070  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7071  InVals.push_back(FIN);
7072 
7073  continue;
7074  }
7075 
7076  if (Flags.isByVal()) {
7077  assert(VA.isRegLoc() && "MemLocs should already be handled.");
7078 
7079  const MCPhysReg ArgReg = VA.getLocReg();
7080  const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7081 
7082  if (Flags.getNonZeroByValAlign() > PtrByteSize)
7083  report_fatal_error("Over aligned byvals not supported yet.");
7084 
7085  const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7086  const int FI = MF.getFrameInfo().CreateFixedObject(
7087  StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7088  /* IsAliased */ true);
7089  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7090  InVals.push_back(FIN);
7091 
7092  // Add live ins for all the RegLocs for the same ByVal.
7093  const TargetRegisterClass *RegClass =
7094  IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7095 
7096  auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7097  unsigned Offset) {
7098  const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7099  // Since the callers side has left justified the aggregate in the
7100  // register, we can simply store the entire register into the stack
7101  // slot.
7102  SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7103  // The store to the fixedstack object is needed becuase accessing a
7104  // field of the ByVal will use a gep and load. Ideally we will optimize
7105  // to extracting the value from the register directly, and elide the
7106  // stores when the arguments address is not taken, but that will need to
7107  // be future work.
7108  SDValue Store = DAG.getStore(
7109  CopyFrom.getValue(1), dl, CopyFrom,
7110  DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
7111  MachinePointerInfo::getFixedStack(MF, FI, Offset));
7112 
7113  MemOps.push_back(Store);
7114  };
7115 
7116  unsigned Offset = 0;
7117  HandleRegLoc(VA.getLocReg(), Offset);
7118  Offset += PtrByteSize;
7119  for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7120  Offset += PtrByteSize) {
7121  assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7122  "RegLocs should be for ByVal argument.");
7123 
7124  const CCValAssign RL = ArgLocs[I++];
7125  HandleRegLoc(RL.getLocReg(), Offset);
7127  }
7128 
7129  if (Offset != StackSize) {
7130  assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7131  "Expected MemLoc for remaining bytes.");
7132  assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7133  // Consume the MemLoc.The InVal has already been emitted, so nothing
7134  // more needs to be done.
7135  ++I;
7136  }
7137 
7138  continue;
7139  }
7140 
7141  if (VA.isRegLoc() && !VA.needsCustom()) {
7142  MVT::SimpleValueType SVT = ValVT.SimpleTy;
7143  Register VReg =
7144  MF.addLiveIn(VA.getLocReg(),
7145  getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7146  Subtarget.hasVSX()));
7147  SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7148  if (ValVT.isScalarInteger() &&
7149  (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7150  ArgValue =
7151  truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7152  }
7153  InVals.push_back(ArgValue);
7154  continue;
7155  }
7156  if (VA.isMemLoc()) {
7157  HandleMemLoc();
7158  continue;
7159  }
7160  }
7161 
7162  // On AIX a minimum of 8 words is saved to the parameter save area.
7163  const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7164  // Area that is at least reserved in the caller of this function.
7165  unsigned CallerReservedArea =
7166  std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7167 
7168  // Set the size that is at least reserved in caller of this function. Tail
7169  // call optimized function's reserved stack space needs to be aligned so
7170  // that taking the difference between two stack areas will result in an
7171  // aligned stack.
7172  CallerReservedArea =
7173  EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7174  FuncInfo->setMinReservedArea(CallerReservedArea);
7175 
7176  if (isVarArg) {
7177  FuncInfo->setVarArgsFrameIndex(
7178  MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7179  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7180 
7181  static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7182  PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7183 
7184  static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7185  PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7186  const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7187 
7188  // The fixed integer arguments of a variadic function are stored to the
7189  // VarArgsFrameIndex on the stack so that they may be loaded by
7190  // dereferencing the result of va_next.
7191  for (unsigned GPRIndex =
7192  (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7193  GPRIndex < NumGPArgRegs; ++GPRIndex) {
7194 
7195  const Register VReg =
7196  IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7197  : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7198 
7199  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7200  SDValue Store =
7201  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7202  MemOps.push_back(Store);
7203  // Increment the address for the next argument to store.
7204  SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7205  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7206  }
7207  }
7208 
7209  if (!MemOps.empty())
7210  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7211 
7212  return Chain;
7213 }
7214 
7215 SDValue PPCTargetLowering::LowerCall_AIX(
7216  SDValue Chain, SDValue Callee, CallFlags CFlags,
7217  const SmallVectorImpl<ISD::OutputArg> &Outs,
7218  const SmallVectorImpl<SDValue> &OutVals,
7219  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7220  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7221  const CallBase *CB) const {
7222  // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7223  // AIX ABI stack frame layout.
7224 
7225  assert((CFlags.CallConv == CallingConv::C ||
7226  CFlags.CallConv == CallingConv::Cold ||
7227  CFlags.CallConv == CallingConv::Fast) &&
7228  "Unexpected calling convention!");
7229 
7230  if (CFlags.IsPatchPoint)
7231  report_fatal_error("This call type is unimplemented on AIX.");
7232 
7233  const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7234 
7235  MachineFunction &MF = DAG.getMachineFunction();
7237  AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7238  *DAG.getContext());
7239 
7240  // Reserve space for the linkage save area (LSA) on the stack.
7241  // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7242  // [SP][CR][LR][2 x reserved][TOC].
7243  // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7244  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7245  const bool IsPPC64 = Subtarget.isPPC64();
7246  const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7247  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7248  CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7249  CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7250 
7251  // The prolog code of the callee may store up to 8 GPR argument registers to
7252  // the stack, allowing va_start to index over them in memory if the callee
7253  // is variadic.
7254  // Because we cannot tell if this is needed on the caller side, we have to
7255  // conservatively assume that it is needed. As such, make sure we have at
7256  // least enough stack space for the caller to store the 8 GPRs.
7257  const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7258  const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7259  CCInfo.getNextStackOffset());
7260 
7261  // Adjust the stack pointer for the new arguments...
7262  // These operations are automatically eliminated by the prolog/epilog pass.
7263  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7264  SDValue CallSeqStart = Chain;
7265 
7267  SmallVector<SDValue, 8> MemOpChains;
7268 
7269  // Set up a copy of the stack pointer for loading and storing any
7270  // arguments that may not fit in the registers available for argument
7271  // passing.
7272  const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7273  : DAG.getRegister(PPC::R1, MVT::i32);
7274 
7275  for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7276  const unsigned ValNo = ArgLocs[I].getValNo();
7277  SDValue Arg = OutVals[ValNo];
7278  ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7279 
7280  if (Flags.isByVal()) {
7281  const unsigned ByValSize = Flags.getByValSize();
7282 
7283  // Nothing to do for zero-sized ByVals on the caller side.
7284  if (!ByValSize) {
7285  ++I;
7286  continue;
7287  }
7288 
7289  auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7290  return DAG.getExtLoad(
7291  ISD::ZEXTLOAD, dl, PtrVT, Chain,
7292  (LoadOffset != 0)
7293  ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7294  : Arg,
7295  MachinePointerInfo(), VT);
7296  };
7297 
7298  unsigned LoadOffset = 0;
7299 
7300  // Initialize registers, which are fully occupied by the by-val argument.
7301  while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7302  SDValue Load = GetLoad(PtrVT, LoadOffset);
7303  MemOpChains.push_back(Load.getValue(1));
7304  LoadOffset += PtrByteSize;
7305  const CCValAssign &ByValVA = ArgLocs[I++];
7306  assert(ByValVA.getValNo() == ValNo &&
7307  "Unexpected location for pass-by-value argument.");
7308  RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7309  }
7310 
7311  if (LoadOffset == ByValSize)
7312  continue;
7313 
7314  // There must be one more loc to handle the remainder.
7315  assert(ArgLocs[I].getValNo() == ValNo &&
7316  "Expected additional location for by-value argument.");
7317 
7318  if (ArgLocs[I].isMemLoc()) {
7319  assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7320  const CCValAssign &ByValVA = ArgLocs[I++];
7321  ISD::ArgFlagsTy MemcpyFlags = Flags;
7322  // Only memcpy the bytes that don't pass in register.
7323  MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7324  Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7325  (LoadOffset != 0)
7326  ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7327  : Arg,
7328  DAG.getObjectPtrOffset(dl, StackPtr,
7329  TypeSize::Fixed(ByValVA.getLocMemOffset())),
7330  CallSeqStart, MemcpyFlags, DAG, dl);
7331  continue;
7332  }
7333 
7334  // Initialize the final register residue.
7335  // Any residue that occupies the final by-val arg register must be
7336  // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7337  // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7338  // 2 and 1 byte loads.
7339  const unsigned ResidueBytes = ByValSize % PtrByteSize;
7340  assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7341  "Unexpected register residue for by-value argument.");
7342  SDValue ResidueVal;
7343  for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7344  const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7345  const MVT VT =
7346  N == 1 ? MVT::i8
7347  : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7348  SDValue Load = GetLoad(VT, LoadOffset);
7349  MemOpChains.push_back(Load.getValue(1));
7350  LoadOffset += N;
7351  Bytes += N;
7352 
7353  // By-val arguments are passed left-justfied in register.
7354  // Every load here needs to be shifted, otherwise a full register load
7355  // should have been used.
7356  assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7357  "Unexpected load emitted during handling of pass-by-value "
7358  "argument.");
7359  unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7360  EVT ShiftAmountTy =
7361  getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7362  SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7363  SDValue ShiftedLoad =
7364  DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7365  ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7366  ShiftedLoad)
7367  : ShiftedLoad;
7368  }
7369 
7370  const CCValAssign &ByValVA = ArgLocs[I++];
7371  RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7372  continue;
7373  }
7374 
7375  CCValAssign &VA = ArgLocs[I++];
7376  const MVT LocVT = VA.getLocVT();
7377  const MVT ValVT = VA.getValVT();
7378 
7379  switch (VA.getLocInfo()) {
7380  default:
7381  report_fatal_error("Unexpected argument extension type.");
7382  case CCValAssign::Full:
7383  break;
7384  case CCValAssign::ZExt:
7385  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7386  break;
7387  case CCValAssign::SExt:
7388  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7389  break;
7390  }
7391 
7392  if (VA.isRegLoc() && !VA.needsCustom()) {
7393  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7394  continue;
7395  }
7396 
7397  // Vector arguments passed to VarArg functions need custom handling when
7398  // they are passed (at least partially) in GPRs.
7399  if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7400  assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7401  // Store value to its stack slot.
7402  SDValue PtrOff =
7403  DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7404  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7405  SDValue Store =
7406  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7407  MemOpChains.push_back(Store);
7408  const unsigned OriginalValNo = VA.getValNo();
7409  // Then load the GPRs from the stack
7410  unsigned LoadOffset = 0;
7411  auto HandleCustomVecRegLoc = [&]() {
7412  assert(I != E && "Unexpected end of CCvalAssigns.");
7413  assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7414  "Expected custom RegLoc.");
7415  CCValAssign RegVA = ArgLocs[I++];
7416  assert(RegVA.getValNo() == OriginalValNo &&
7417  "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7418  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7419  DAG.getConstant(LoadOffset, dl, PtrVT));
7420  SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7421  MemOpChains.push_back(Load.getValue(1));
7422  RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7423  LoadOffset += PtrByteSize;
7424  };
7425 
7426  // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7427  // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7428  // R10.
7429  HandleCustomVecRegLoc();
7430  HandleCustomVecRegLoc();
7431 
7432  if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7433  ArgLocs[I].getValNo() == OriginalValNo) {
7434  assert(!IsPPC64 &&
7435  "Only 2 custom RegLocs expected for 64-bit codegen.");
7436  HandleCustomVecRegLoc();
7437  HandleCustomVecRegLoc();
7438  }
7439 
7440  continue;
7441  }
7442 
7443  if (VA.isMemLoc()) {
7444  SDValue PtrOff =
7445  DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7446  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7447  MemOpChains.push_back(
7448  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7449 
7450  continue;
7451  }
7452 
7453  if (!ValVT.isFloatingPoint())
7455  "Unexpected register handling for calling convention.");
7456 
7457  // Custom handling is used for GPR initializations for vararg float
7458  // arguments.
7459  assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7460  LocVT.isInteger() &&
7461  "Custom register handling only expected for VarArg.");
7462 
7463  SDValue ArgAsInt =
7465 
7466  if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7467  // f32 in 32-bit GPR
7468  // f64 in 64-bit GPR
7469  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7470  else if (Arg.getValueType().getFixedSizeInBits() <
7471  LocVT.getFixedSizeInBits())
7472  // f32 in 64-bit GPR.
7473  RegsToPass.push_back(std::make_pair(
7474  VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7475  else {
7476  // f64 in two 32-bit GPRs
7477  // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7478  assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7479  "Unexpected custom register for argument!");
7480  CCValAssign &GPR1 = VA;
7481  SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7482  DAG.getConstant(32, dl, MVT::i8));
7483  RegsToPass.push_back(std::make_pair(
7484  GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7485 
7486  if (I != E) {
7487  // If only 1 GPR was available, there will only be one custom GPR and
7488  // the argument will also pass in memory.
7489  CCValAssign &PeekArg = ArgLocs[I];
7490  if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7491  assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7492  CCValAssign &GPR2 = ArgLocs[I++];
7493  RegsToPass.push_back(std::make_pair(
7494  GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7495  }
7496  }
7497  }
7498  }
7499 
7500  if (!MemOpChains.empty())
7501  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7502 
7503  // For indirect calls, we need to save the TOC base to the stack for
7504  // restoration after the call.
7505  if (CFlags.IsIndirect) {
7506  assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7507  const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7508  const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7509  const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7510  const unsigned TOCSaveOffset =
7511  Subtarget.getFrameLowering()->getTOCSaveOffset();
7512 
7513  setUsesTOCBasePtr(DAG);
7514  SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7515  SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7516  SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7517  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7518  Chain = DAG.getStore(
7519  Val.getValue(1), dl, Val, AddPtr,
7520  MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7521  }
7522 
7523  // Build a sequence of copy-to-reg nodes chained together with token chain
7524  // and flag operands which copy the outgoing args into the appropriate regs.
7525  SDValue InFlag;
7526  for (auto Reg : RegsToPass) {
7527  Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7528  InFlag = Chain.getValue(1);
7529  }
7530 
7531  const int SPDiff = 0;
7532  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7533  Callee, SPDiff, NumBytes, Ins, InVals, CB);
7534 }
7535 
7536 bool
7537 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7538  MachineFunction &MF, bool isVarArg,
7539  const SmallVectorImpl<ISD::OutputArg> &Outs,
7540  LLVMContext &Context) const {
7542  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7543  return CCInfo.CheckReturn(
7544  Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7545  ? RetCC_PPC_Cold
7546  : RetCC_PPC);
7547 }
7548 
7549 SDValue
7550 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7551  bool isVarArg,
7552  const SmallVectorImpl<ISD::OutputArg> &Outs,
7553  const SmallVectorImpl<SDValue> &OutVals,
7554  const SDLoc &dl, SelectionDAG &DAG) const {
7556  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7557  *DAG.getContext());
7558  CCInfo.AnalyzeReturn(Outs,
7559  (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7560  ? RetCC_PPC_Cold
7561  : RetCC_PPC);
7562 
7563  SDValue Flag;
7564  SmallVector<SDValue, 4> RetOps(1, Chain);
7565 
7566  // Copy the result values into the output registers.
7567  for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7568  CCValAssign &VA = RVLocs[i];
7569  assert(VA.isRegLoc() && "Can only return in registers!");
7570 
7571  SDValue Arg = OutVals[RealResIdx];
7572 
7573  switch (VA.getLocInfo()) {
7574  default: llvm_unreachable("Unknown loc info!");
7575  case CCValAssign::Full: break;
7576  case CCValAssign::AExt:
7577  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7578  break;
7579  case CCValAssign::ZExt:
7580  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7581  break;
7582  case CCValAssign::SExt:
7583  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7584  break;
7585  }
7586  if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7587  bool isLittleEndian = Subtarget.isLittleEndian();
7588  // Legalize ret f64 -> ret 2 x i32.
7589  SDValue SVal =
7591  DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7592  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7593  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7594  SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7595  DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7596  Flag = Chain.getValue(1);
7597  VA = RVLocs[++i]; // skip ahead to next loc
7598  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7599  } else
7600  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7601  Flag = Chain.getValue(1);
7602  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7603  }
7604 
7605  RetOps[0] = Chain; // Update chain.
7606 
7607  // Add the flag if we have it.
7608  if (Flag.getNode())
7609  RetOps.push_back(Flag);
7610 
7611  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7612 }
7613 
7614 SDValue
7615 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7616  SelectionDAG &DAG) const {
7617  SDLoc dl(Op);
7618 
7619  // Get the correct type for integers.
7620  EVT IntVT = Op.getValueType();
7621 
7622  // Get the inputs.
7623  SDValue Chain = Op.getOperand(0);
7624  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7625  // Build a DYNAREAOFFSET node.
7626  SDValue Ops[2] = {Chain, FPSIdx};
7627  SDVTList VTs = DAG.getVTList(IntVT);
7628  return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7629 }
7630 
7631 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7632  SelectionDAG &DAG) const {
7633  // When we pop the dynamic allocation we need to restore the SP link.
7634  SDLoc dl(Op);
7635 
7636  // Get the correct type for pointers.
7637  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7638 
7639  // Construct the stack pointer operand.
7640  bool isPPC64 = Subtarget.isPPC64();
7641  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7642  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7643 
7644  // Get the operands for the STACKRESTORE.
7645  SDValue Chain = Op.getOperand(0);
7646  SDValue SaveSP = Op.getOperand(1);
7647 
7648  // Load the old link SP.
7649  SDValue LoadLinkSP =
7650  DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7651 
7652  // Restore the stack pointer.
7653  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7654 
7655  // Store the old link SP.
7656  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7657 }
7658 
7659 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7660  MachineFunction &MF = DAG.getMachineFunction();
7661  bool isPPC64 = Subtarget.isPPC64();
7662  EVT PtrVT = getPointerTy(MF.getDataLayout());
7663 
7664  // Get current frame pointer save index. The users of this index will be
7665  // primarily DYNALLOC instructions.
7667  int RASI = FI->getReturnAddrSaveIndex();
7668 
7669  // If the frame pointer save index hasn't been defined yet.
7670  if (!RASI) {
7671  // Find out what the fix offset of the frame pointer save area.
7672  int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7673  // Allocate the frame index for frame pointer save area.
7674  RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7675  // Save the result.
7676  FI->setReturnAddrSaveIndex(RASI);
7677  }
7678  return DAG.getFrameIndex(RASI, PtrVT);
7679 }
7680 
7681 SDValue
7682 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7683  MachineFunction &MF = DAG.getMachineFunction();
7684  bool isPPC64 = Subtarget.isPPC64();
7685  EVT PtrVT = getPointerTy(MF.getDataLayout());
7686 
7687  // Get current frame pointer save index. The users of this index will be
7688  // primarily DYNALLOC instructions.
7690  int FPSI = FI->getFramePointerSaveIndex();
7691 
7692  // If the frame pointer save index hasn't been defined yet.
7693  if (!FPSI) {
7694  // Find out what the fix offset of the frame pointer save area.
7695  int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7696  // Allocate the frame index for frame pointer save area.
7697  FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7698  // Save the result.
7699  FI->setFramePointerSaveIndex(FPSI);
7700  }
7701  return DAG.getFrameIndex(FPSI, PtrVT);
7702 }
7703 
7704 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7705  SelectionDAG &DAG) const {
7706  MachineFunction &MF = DAG.getMachineFunction();
7707  // Get the inputs.
7708  SDValue Chain = Op.getOperand(0);
7709  SDValue Size = Op.getOperand(1);
7710  SDLoc dl(Op);
7711 
7712  // Get the correct type for pointers.
7713  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7714  // Negate the size.
7715  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7716  DAG.getConstant(0, dl, PtrVT), Size);
7717  // Construct a node for the frame pointer save index.
7718  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7719  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7720  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7721  if (hasInlineStackProbe(MF))
7722  return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7723  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7724 }
7725 
7726 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7727  SelectionDAG &DAG) const {
7728  MachineFunction &MF = DAG.getMachineFunction();
7729 
7730  bool isPPC64 = Subtarget.isPPC64();
7731  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7732 
7733  int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7734  return DAG.getFrameIndex(FI, PtrVT);
7735 }
7736 
7737 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7738  SelectionDAG &DAG) const {
7739  SDLoc DL(Op);
7740  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7742  Op.getOperand(0), Op.getOperand(1));
7743 }
7744 
7745 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7746  SelectionDAG &DAG) const {
7747  SDLoc DL(Op);
7749  Op.getOperand(0), Op.getOperand(1));
7750 }
7751 
7752 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7753  if (Op.getValueType().isVector())
7754  return LowerVectorLoad(Op, DAG);
7755 
7756  assert(Op.getValueType() == MVT::i1 &&
7757  "Custom lowering only for i1 loads");
7758 
7759  // First, load 8 bits into 32 bits, then truncate to 1 bit.
7760 
7761  SDLoc dl(Op);
7762  LoadSDNode *LD = cast<LoadSDNode>(Op);
7763 
7764  SDValue Chain = LD->getChain();
7765  SDValue BasePtr = LD->getBasePtr();
7766  MachineMemOperand *MMO = LD->getMemOperand();
7767 
7768  SDValue NewLD =
7769  DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7770  BasePtr, MVT::i8, MMO);
7771  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7772 
7773  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7774  return DAG.getMergeValues(Ops, dl);
7775 }
7776 
7777 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7778  if (Op.getOperand(1).getValueType().isVector())
7779  return LowerVectorStore(Op, DAG);
7780 
7781  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7782  "Custom lowering only for i1 stores");
7783 
7784  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7785 
7786  SDLoc dl(Op);
7787  StoreSDNode *ST = cast<StoreSDNode>(Op);
7788 
7789  SDValue Chain = ST->getChain();
7790  SDValue BasePtr = ST->getBasePtr();
7791  SDValue Value = ST->getValue();
7792  MachineMemOperand *MMO = ST->getMemOperand();
7793 
7795  Value);
7796  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7797 }
7798 
7799 // FIXME: Remove this once the ANDI glue bug is fixed:
7800 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7801  assert(Op.getValueType() == MVT::i1 &&
7802  "Custom lowering only for i1 results");
7803 
7804  SDLoc DL(Op);
7805  return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7806 }
7807 
7808 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7809  SelectionDAG &DAG) const {
7810 
7811  // Implements a vector truncate that fits in a vector register as a shuffle.
7812  // We want to legalize vector truncates down to where the source fits in
7813  // a vector register (and target is therefore smaller than vector register
7814  // size). At that point legalization will try to custom lower the sub-legal
7815  // result and get here - where we can contain the truncate as a single target
7816  // operation.
7817 
7818  // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7819  // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7820  //
7821  // We will implement it for big-endian ordering as this (where x denotes
7822  // undefined):
7823  // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7824  // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7825  //
7826  // The same operation in little-endian ordering will be:
7827  // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7828  // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7829 
7830  EVT TrgVT = Op.getValueType();
7831  assert(TrgVT.isVector() && "Vector type expected.");
7832  unsigned TrgNumElts = TrgVT.getVectorNumElements();
7833  EVT EltVT = TrgVT.getVectorElementType();
7834  if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7835  TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7836  !isPowerOf2_32(EltVT.getSizeInBits()))
7837  return SDValue();
7838 
7839  SDValue N1 = Op.getOperand(0);
7840  EVT SrcVT = N1.getValueType();
7841  unsigned SrcSize = SrcVT.getSizeInBits();
7842  if (SrcSize > 256 ||
7843  !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7845  return SDValue();
7846  if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7847  return SDValue();
7848 
7849  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7850  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7851 
7852  SDLoc DL(Op);
7853  SDValue Op1, Op2;
7854  if (SrcSize == 256) {
7855  EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7856  EVT SplitVT =
7858  unsigned SplitNumElts = SplitVT.getVectorNumElements();
7859  Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7860  DAG.getConstant(0, DL, VecIdxTy));
7861  Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7862  DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7863  }
7864  else {
7865  Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7866  Op2 = DAG.getUNDEF(WideVT);
7867  }
7868 
7869  // First list the elements we want to keep.
7870  unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7871  SmallVector<int, 16> ShuffV;
7872  if (Subtarget.isLittleEndian())
7873  for (unsigned i = 0; i < TrgNumElts; ++i)
7874  ShuffV.push_back(i * SizeMult);
7875  else
7876  for (unsigned i = 1; i <= TrgNumElts; ++i)
7877  ShuffV.push_back(i * SizeMult - 1);
7878 
7879  // Populate the remaining elements with undefs.
7880  for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7881  // ShuffV.push_back(i + WideNumElts);
7882  ShuffV.push_back(WideNumElts + 1);
7883 
7884  Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7885  Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7886  return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7887 }
7888 
7889 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7890 /// possible.
7891 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7892  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7893  EVT ResVT = Op.getValueType();
7894  EVT CmpVT = Op.getOperand(0).getValueType();
7895  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7896  SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7897  SDLoc dl(Op);
7898 
7899  // Without power9-vector, we don't have native instruction for f128 comparison.
7900  // Following transformation to libcall is needed for setcc:
7901  // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
7902  if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
7903  SDValue Z = DAG.getSetCC(
7904  dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
7905  LHS, RHS, CC);
7906  SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
7907  return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
7908  }
7909 
7910  // Not FP, or using SPE? Not a fsel.
7911  if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
7912  Subtarget.hasSPE())
7913  return Op;
7914 
7915  SDNodeFlags Flags = Op.getNode()->getFlags();
7916 
7917  // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
7918  // presence of infinities.
7919  if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7920  switch (CC) {
7921  default:
7922  break;
7923  case ISD::SETOGT:
7924  case ISD::SETGT:
7925  return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
7926  case ISD::SETOLT:
7927  case ISD::SETLT:
7928  return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
7929  }
7930  }
7931 
7932  // We might be able to do better than this under some circumstances, but in
7933  // general, fsel-based lowering of select is a finite-math-only optimization.
7934  // For more information, see section F.3 of the 2.06 ISA specification.
7935  // With ISA 3.0
7936  if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
7937  (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
7938  return Op;
7939 
7940  // If the RHS of the comparison is a 0.0, we don't need to do the
7941  // subtraction at all.
7942  SDValue Sel1;
7943  if (isFloatingPointZero(RHS))
7944  switch (CC) {
7945  default: break; // SETUO etc aren't handled by fsel.
7946  case ISD::SETNE:
7947  std::swap(TV, FV);
7949  case ISD::SETEQ:
7950  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7951  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7952  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7953  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7954  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7955  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7956  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7957  case ISD::SETULT:
7958  case ISD::SETLT:
7959  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7961  case ISD::SETOGE:
7962  case ISD::SETGE:
7963  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7964  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7965  return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7966  case ISD::SETUGT:
7967  case ISD::SETGT:
7968  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7970  case ISD::SETOLE:
7971  case ISD::SETLE:
7972  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7973  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7974  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7975  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7976  }
7977 
7978  SDValue Cmp;
7979  switch (CC) {
7980  default: break; // SETUO etc aren't handled by fsel.
7981  case ISD::SETNE:
7982  std::swap(TV, FV);
7984  case ISD::SETEQ:
7985  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7986  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7987  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7988  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7989  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7990  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7991  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7992  DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7993  case ISD::SETULT:
7994  case ISD::SETLT:
7995  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7996  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7997  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7998  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7999  case ISD::SETOGE:
8000  case ISD::SETGE:
8001  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8002  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8003  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8004  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8005  case ISD::SETUGT:
8006  case ISD::SETGT:
8007  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8008  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8009  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8010  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8011  case ISD::SETOLE:
8012  case ISD::SETLE:
8013  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8014  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8015  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8016  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8017  }
8018  return Op;
8019 }
8020 
8021 static unsigned getPPCStrictOpcode(unsigned Opc) {
8022  switch (Opc) {
8023  default:
8024  llvm_unreachable("No strict version of this opcode!");
8025  case PPCISD::FCTIDZ:
8026  return PPCISD::STRICT_FCTIDZ;
8027  case PPCISD::FCTIWZ:
8028  return PPCISD::STRICT_FCTIWZ;
8029  case PPCISD::FCTIDUZ:
8030  return PPCISD::STRICT_FCTIDUZ;
8031  case PPCISD::FCTIWUZ:
8032  return PPCISD::STRICT_FCTIWUZ;
8033  case PPCISD::FCFID:
8034  return PPCISD::STRICT_FCFID;
8035  case PPCISD::FCFIDU:
8036  return PPCISD::STRICT_FCFIDU;
8037  case PPCISD::FCFIDS:
8038  return PPCISD::STRICT_FCFIDS;
8039  case PPCISD::FCFIDUS:
8040  return PPCISD::STRICT_FCFIDUS;
8041  }
8042 }
8043 
8045  const PPCSubtarget &Subtarget) {
8046  SDLoc dl(Op);
8047  bool IsStrict = Op->isStrictFPOpcode();
8048  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8049  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8050 
8051  // TODO: Any other flags to propagate?
8052  SDNodeFlags Flags;
8053  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8054 
8055  // For strict nodes, source is the second operand.
8056  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8057  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8058  assert(Src.getValueType().isFloatingPoint());
8059  if (Src.getValueType() == MVT::f32) {
8060  if (IsStrict) {
8061  Src =
8063  DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8064  Chain = Src.getValue(1);
8065  } else
8066  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8067  }
8068  SDValue Conv;
8069  unsigned Opc = ISD::DELETED_NODE;
8070  switch (Op.getSimpleValueType().SimpleTy) {
8071  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8072  case MVT::i32:
8073  Opc = IsSigned ? PPCISD::FCTIWZ
8074  : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8075  break;
8076  case MVT::i64:
8077  assert((IsSigned || Subtarget.hasFPCVT()) &&
8078  "i64 FP_TO_UINT is supported only with FPCVT");
8079  Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8080  }
8081  if (IsStrict) {
8082  Opc = getPPCStrictOpcode(Opc);
8083  Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
8084  {Chain, Src}, Flags);
8085  } else {
8086  Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
8087  }
8088  return Conv;
8089 }
8090 
8091 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8092  SelectionDAG &DAG,
8093  const SDLoc &dl) const {
8094  SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8095  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8096  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8097  bool IsStrict = Op->isStrictFPOpcode();
8098 
8099  // Convert the FP value to an int value through memory.
8100  bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8101  (IsSigned || Subtarget.hasFPCVT());
8102  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8103  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8104  MachinePointerInfo MPI =
8106 
8107  // Emit a store to the stack slot.
8108  SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8110  if (i32Stack) {
8111  MachineFunction &MF = DAG.getMachineFunction();
8112  Alignment = Align(4);
8113  MachineMemOperand *MMO =
8114  MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8115  SDValue Ops[] = { Chain, Tmp, FIPtr };
8116  Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8117  DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8118  } else
8119  Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8120 
8121  // Result is a load from the stack slot. If loading 4 bytes, make sure to
8122  // add in a bias on big endian.
8123  if (Op.getValueType() == MVT::i32 && !i32Stack) {
8124  FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8125  DAG.getConstant(4, dl, FIPtr.getValueType()));
8126  MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8127  }
8128 
8129  RLI.Chain = Chain;
8130  RLI.Ptr = FIPtr;
8131  RLI.MPI = MPI;
8132  RLI.Alignment = Alignment;
8133 }
8134 
8135 /// Custom lowers floating point to integer conversions to use
8136 /// the direct move instructions available in ISA 2.07 to avoid the
8137 /// need for load/store combinations.
8138 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8139  SelectionDAG &DAG,
8140  const SDLoc &dl) const {
8141  SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8142  SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8143  if (Op->isStrictFPOpcode())
8144  return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8145  else
8146  return Mov;
8147 }
8148 
8149 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8150  const SDLoc &dl) const {
8151  bool IsStrict = Op->isStrictFPOpcode();
8152  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8153  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8154  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8155  EVT SrcVT = Src.getValueType();
8156  EVT DstVT = Op.getValueType();
8157 
8158  // FP to INT conversions are legal for f128.
8159  if (SrcVT == MVT::f128)
8160  return Subtarget.hasP9Vector() ? Op : SDValue();
8161 
8162  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8163  // PPC (the libcall is not available).
8164  if (SrcVT == MVT::ppcf128) {
8165  if (DstVT == MVT::i32) {
8166  // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8167  // set other fast-math flags to FP operations in both strict and
8168  // non-strict cases. (FP_TO_SINT, FSUB)
8169  SDNodeFlags Flags;
8170  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8171 
8172  if (IsSigned) {
8174  DAG.getIntPtrConstant(0, dl));
8176  DAG.getIntPtrConstant(1, dl));
8177 
8178  // Add the two halves of the long double in round-to-zero mode, and use
8179  // a smaller FP_TO_SINT.
8180  if (IsStrict) {
8181  SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8183  {Op.getOperand(0), Lo, Hi}, Flags);
8184  return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8186  {Res.getValue(1), Res}, Flags);
8187  } else {
8188  SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8189  return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8190  }
8191  } else {
8192  const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8193  APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8194  SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8195  SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8196  if (IsStrict) {
8197  // Sel = Src < 0x80000000
8198  // FltOfs = select Sel, 0.0, 0x80000000
8199  // IntOfs = select Sel, 0, 0x80000000
8200  // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8201  SDValue Chain = Op.getOperand(0);
8202  EVT SetCCVT =
8203  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8204  EVT DstSetCCVT =
8205  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8206  SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8207  Chain, true);
8208  Chain = Sel.getValue(1);
8209 
8210  SDValue FltOfs = DAG.getSelect(
8211  dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8212  Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8213 
8214  SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8215  DAG.getVTList(SrcVT, MVT::Other),
8216  {Chain, Src, FltOfs}, Flags);
8217  Chain = Val.getValue(1);
8218  SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8219  DAG.getVTList(DstVT, MVT::Other),
8220  {Chain, Val}, Flags);
8221  Chain = SInt.getValue(1);
8222  SDValue IntOfs = DAG.getSelect(
8223  dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8224  SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8225  return DAG.getMergeValues({Result, Chain}, dl);
8226  } else {
8227  // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8228  // FIXME: generated code sucks.
8229  SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8230  True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8231  True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8232  SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8233  return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8234  }
8235  }
8236  }
8237 
8238  return SDValue();
8239  }
8240 
8241  if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8242  return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8243 
8244  ReuseLoadInfo RLI;
8245  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8246 
8247  return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8248  RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8249 }
8250 
8251 // We're trying to insert a regular store, S, and then a load, L. If the
8252 // incoming value, O, is a load, we might just be able to have our load use the
8253 // address used by O. However, we don't know if anything else will store to
8254 // that address before we can load from it. To prevent this situation, we need
8255 // to insert our load, L, into the chain as a peer of O. To do this, we give L
8256 // the same chain operand as O, we create a token factor from the chain results
8257 // of O and L, and we replace all uses of O's chain result with that token
8258 // factor (see spliceIntoChain below for this last part).
8259 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8260  ReuseLoadInfo &RLI,
8261  SelectionDAG &DAG,
8262  ISD::LoadExtType ET) const {
8263  // Conservatively skip reusing for constrained FP nodes.
8264  if (Op->isStrictFPOpcode())
8265  return false;
8266 
8267  SDLoc dl(Op);
8268  bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8269  (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8270  if (ET == ISD::NON_EXTLOAD &&
8271  (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8272  isOperationLegalOrCustom(Op.getOpcode(),
8273  Op.getOperand(0).getValueType())) {
8274 
8275  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8276  return true;
8277  }
8278 
8279  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8280  if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8281  LD->isNonTemporal())
8282  return false;
8283  if (LD->getMemoryVT() != MemVT)
8284  return false;
8285 
8286  // If the result of the load is an illegal type, then we can't build a
8287  // valid chain for reuse since the legalised loads and token factor node that
8288  // ties the legalised loads together uses a different output chain then the
8289  // illegal load.
8290  if (!isTypeLegal(LD->getValueType(0)))
8291  return false;
8292 
8293  RLI.Ptr = LD->getBasePtr();
8294  if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8295  assert(LD->getAddressingMode() == ISD::PRE_INC &&
8296  "Non-pre-inc AM on PPC?");
8297  RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8298  LD->getOffset());
8299  }
8300 
8301  RLI.Chain = LD->getChain();
8302  RLI.MPI = LD->getPointerInfo();
8303  RLI.IsDereferenceable = LD->isDereferenceable();
8304  RLI.IsInvariant = LD->isInvariant();
8305  RLI.Alignment = LD->getAlign();
8306  RLI.AAInfo = LD->getAAInfo();
8307  RLI.Ranges = LD->getRanges();
8308 
8309  RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8310  return true;
8311 }
8312 
8313 // Given the head of the old chain, ResChain, insert a token factor containing
8314 // it and NewResChain, and make users of ResChain now be users of that token
8315 // factor.
8316 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8317 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8318  SDValue NewResChain,
8319  SelectionDAG &DAG) const {
8320  if (!ResChain)
8321  return;
8322 
8323  SDLoc dl(NewResChain);
8324 
8326  NewResChain, DAG.getUNDEF(MVT::Other));
8327  assert(TF.getNode() != NewResChain.getNode() &&
8328  "A new TF really is required here");
8329 
8330  DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8331  DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8332 }
8333 
8334 /// Analyze profitability of direct move
8335 /// prefer float load to int load plus direct move
8336 /// when there is no integer use of int load
8337 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8338  SDNode *Origin = Op.getOperand(0).getNode();
8339  if (Origin->getOpcode() != ISD::LOAD)
8340  return true;
8341 
8342  // If there is no LXSIBZX/LXSIHZX, like Power8,
8343  // prefer direct move if the memory size is 1 or 2 bytes.
8344  MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8345  if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8346  return true;
8347 
8348  for (SDNode::use_iterator UI = Origin->use_begin(),
8349  UE = Origin->use_end();
8350  UI != UE; ++UI) {
8351 
8352  // Only look at the users of the loaded value.
8353  if (UI.getUse().get().getResNo() != 0)
8354  continue;
8355 
8356  if (UI->getOpcode() != ISD::SINT_TO_FP &&
8357  UI->getOpcode() != ISD::UINT_TO_FP &&
8358  UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8359  UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8360  return true;
8361  }
8362 
8363  return false;
8364 }
8365 
8367  const PPCSubtarget &Subtarget,
8368  SDValue Chain = SDValue()) {
8369  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8370  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8371  SDLoc dl(Op);
8372 
8373  // TODO: Any other flags to propagate?
8374  SDNodeFlags Flags;
8375  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8376 
8377  // If we have FCFIDS, then use it when converting to single-precision.
8378  // Otherwise, convert to double-precision and then round.
8379  bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8380  unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8381  : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8382  EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8383  if (Op->isStrictFPOpcode()) {
8384  if (!Chain)
8385  Chain = Op.getOperand(0);
8386  return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8387  DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8388  } else
8389  return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8390 }
8391 
8392 /// Custom lowers integer to floating point conversions to use
8393 /// the direct move instructions available in ISA 2.07 to avoid the
8394 /// need for load/store combinations.
8395 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8396  SelectionDAG &DAG,
8397  const SDLoc &dl) const {
8398  assert((Op.getValueType() == MVT::f32 ||
8399  Op.getValueType() == MVT::f64) &&
8400  "Invalid floating point type as target of conversion");
8401  assert(Subtarget.hasFPCVT() &&
8402  "Int to FP conversions with direct moves require FPCVT");
8403  SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8404  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8405  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8406  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8407  unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8408  SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8409  return convertIntToFP(Op, Mov, DAG, Subtarget);
8410 }
8411 
8412 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8413 
8414  EVT VecVT = Vec.getValueType();
8415  assert(VecVT.isVector() && "Expected a vector type.");
8416  assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8417 
8418  EVT EltVT = VecVT.getVectorElementType();
8419  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8420  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8421 
8422  unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8423  SmallVector<SDValue, 16> Ops(NumConcat);
8424  Ops[0] = Vec;
8425  SDValue UndefVec = DAG.getUNDEF(VecVT);
8426  for (unsigned i = 1; i < NumConcat; ++i)
8427  Ops[i] = UndefVec;
8428 
8429  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8430 }
8431 
8432 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8433  const SDLoc &dl) const {
8434  bool IsStrict = Op->isStrictFPOpcode();
8435  unsigned Opc = Op.getOpcode();
8436  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8437  assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8438  Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8439  "Unexpected conversion type");
8440  assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8441  "Supports conversions to v2f64/v4f32 only.");
8442 
8443  // TODO: Any other flags to propagate?
8444  SDNodeFlags Flags;
8445  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8446 
8447  bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8448  bool FourEltRes = Op.getValueType() == MVT::v4f32;
8449 
8450  SDValue Wide = widenVec(DAG, Src, dl);
8451  EVT WideVT = Wide.getValueType();
8452  unsigned WideNumElts = WideVT.getVectorNumElements();
8453  MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8454 
8455  SmallVector<int, 16> ShuffV;
8456  for (unsigned i = 0; i < WideNumElts; ++i)
8457  ShuffV.push_back(i + WideNumElts);
8458 
8459  int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8460  int SaveElts = FourEltRes ? 4 : 2;
8461  if (Subtarget.isLittleEndian())
8462  for (int i = 0; i < SaveElts; i++)
8463  ShuffV[i * Stride] = i;
8464  else
8465  for (int i = 1; i <= SaveElts; i++)
8466  ShuffV[i * Stride - 1] = i - 1;
8467 
8468  SDValue ShuffleSrc2 =
8469  SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8470  SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8471 
8472  SDValue Extend;
8473  if (SignedConv) {
8474  Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8475  EVT ExtVT = Src.getValueType();
8476  if (Subtarget.hasP9Altivec())
8477  ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8478  IntermediateVT.getVectorNumElements());
8479 
8480  Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8481  DAG.getValueType(ExtVT));
8482  } else
8483  Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8484 
8485  if (IsStrict)
8486  return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8487  {Op.getOperand(0), Extend}, Flags);
8488 
8489  return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8490 }
8491 
8492 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8493  SelectionDAG &DAG) const {
8494  SDLoc dl(Op);
8495  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8496  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8497  bool IsStrict = Op->isStrictFPOpcode();
8498  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8499  SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8500 
8501  // TODO: Any other flags to propagate?
8502  SDNodeFlags Flags;
8503  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8504 
8505  EVT InVT = Src.getValueType();
8506  EVT OutVT = Op.getValueType();
8507  if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8508  isOperationCustom(Op.getOpcode(), InVT))
8509  return LowerINT_TO_FPVector(Op, DAG, dl);
8510 
8511  // Conversions to f128 are legal.
8512  if (Op.getValueType() == MVT::f128)
8513  return Subtarget.hasP9Vector() ? Op : SDValue();
8514 
8515  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8516  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8517  return SDValue();
8518 
8519  if (Src.getValueType() == MVT::i1) {
8520  SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8521  DAG.getConstantFP(1.0, dl, Op.getValueType()),
8522  DAG.getConstantFP(0.0, dl, Op.getValueType()));
8523  if (IsStrict)
8524  return DAG.getMergeValues({Sel, Chain}, dl);
8525  else
8526  return Sel;
8527  }
8528 
8529  // If we have direct moves, we can do all the conversion, skip the store/load
8530  // however, without FPCVT we can't do most conversions.
8531  if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8532  Subtarget.isPPC64() && Subtarget.hasFPCVT())
8533  return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8534 
8535  assert((IsSigned || Subtarget.hasFPCVT()) &&
8536  "UINT_TO_FP is supported only with FPCVT");
8537 
8538  if (Src.getValueType() == MVT::i64) {
8539  SDValue SINT = Src;
8540  // When converting to single-precision, we actually need to convert
8541  // to double-precision first and then round to single-precision.
8542  // To avoid double-rounding effects during that operation, we have
8543  // to prepare the input operand. Bits that might be truncated when
8544  // converting to double-precision are replaced by a bit that won't
8545  // be lost at this stage, but is below the single-precision rounding
8546  // position.
8547  //
8548  // However, if -enable-unsafe-fp-math is in effect, accept double
8549  // rounding to avoid the extra overhead.
8550  if (Op.getValueType() == MVT::f32 &&
8551  !Subtarget.hasFPCVT() &&
8552  !DAG.getTarget().Options.UnsafeFPMath) {
8553 
8554  // Twiddle input to make sure the low 11 bits are zero. (If this
8555  // is the case, we are guaranteed the value will fit into the 53 bit
8556  // mantissa of an IEEE double-precision value without rounding.)
8557  // If any of those low 11 bits were not zero originally, make sure
8558  // bit 12 (value 2048) is set instead, so that the final rounding
8559  // to single-precision gets the correct result.
8560  SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8561  SINT, DAG.getConstant(2047, dl, MVT::i64));
8562  Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8563  Round, DAG.getConstant(2047, dl, MVT::i64));
8564  Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8565  Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8566  Round, DAG.getConstant(-2048, dl, MVT::i64));
8567 
8568  // However, we cannot use that value unconditionally: if the magnitude
8569  // of the input value is small, the bit-twiddling we did above might
8570  // end up visibly changing the output. Fortunately, in that case, we
8571  // don't need to twiddle bits since the original input will convert
8572  // exactly to double-precision floating-point already. Therefore,
8573  // construct a conditional to use the original value if the top 11
8574  // bits are all sign-bit copies, and use the rounded value computed
8575  // above otherwise.
8576  SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8577  SINT, DAG.getConstant(53, dl, MVT::i32));
8578  Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8579  Cond, DAG.getConstant(1, dl, MVT::i64));
8580  Cond = DAG.getSetCC(
8581  dl,
8583  Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8584 
8585  SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8586  }
8587 
8588  ReuseLoadInfo RLI;
8589  SDValue Bits;
8590 
8591  MachineFunction &MF = DAG.getMachineFunction();
8592  if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8593  Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8594  RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8595  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8596  } else if (Subtarget.hasLFIWAX() &&
8597  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8598  MachineMemOperand *MMO =
8600  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8601  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8604  Ops, MVT::i32, MMO);
8605  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8606  } else if (Subtarget.hasFPCVT() &&
8607  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8608  MachineMemOperand *MMO =
8610  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8611  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8614  Ops, MVT::i32, MMO);
8615  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8616  } else if (((Subtarget.hasLFIWAX() &&
8617  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8618  (Subtarget.hasFPCVT() &&
8619  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8620  SINT.getOperand(0).getValueType() == MVT::i32) {
8621  MachineFrameInfo &MFI = MF.getFrameInfo();
8622  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8623 
8624  int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8625  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8626 
8627  SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8629  DAG.getMachineFunction(), FrameIdx));
8630  Chain = Store;
8631 
8632  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8633  "Expected an i32 store");
8634 
8635  RLI.Ptr = FIdx;
8636  RLI.Chain = Chain;
8637  RLI.MPI =
8639  RLI.Alignment = Align(4);
8640 
8641  MachineMemOperand *MMO =
8643  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8644  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8647  dl, DAG.getVTList(MVT::f64, MVT::Other),
8648  Ops, MVT::i32, MMO);
8649  Chain = Bits.getValue(1);
8650  } else
8651  Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8652 
8653  SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8654  if (IsStrict)
8655  Chain = FP.getValue(1);
8656 
8657  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8658  if (IsStrict)
8659  FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8661  {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8662  else
8663  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8664  DAG.getIntPtrConstant(0, dl));
8665  }
8666  return FP;
8667  }
8668 
8669  assert(Src.getValueType() == MVT::i32 &&
8670  "Unhandled INT_TO_FP type in custom expander!");
8671  // Since we only generate this in 64-bit mode, we can take advantage of
8672  // 64-bit registers. In particular, sign extend the input value into the
8673  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8674  // then lfd it and fcfid it.
8675  MachineFunction &MF = DAG.getMachineFunction();
8676  MachineFrameInfo &MFI = MF.getFrameInfo();
8677  EVT PtrVT = getPointerTy(MF.getDataLayout());
8678 
8679  SDValue Ld;
8680  if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8681  ReuseLoadInfo RLI;
8682  bool ReusingLoad;
8683  if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8684  int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8685  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8686 
8687  SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8689  DAG.getMachineFunction(), FrameIdx));
8690  Chain = Store;
8691 
8692  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8693  "Expected an i32 store");
8694 
8695  RLI.Ptr = FIdx;
8696  RLI.Chain = Chain;
8697  RLI.MPI =
8699  RLI.Alignment = Align(4);
8700  }
8701 
8702  MachineMemOperand *MMO =
8704  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8705  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8706  Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8707  DAG.getVTList(MVT::f64, MVT::Other), Ops,
8708  MVT::i32, MMO);
8709  Chain = Ld.getValue(1);
8710  if (ReusingLoad)
8711  spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8712  } else {
8713  assert(Subtarget.isPPC64() &&
8714  "i32->FP without LFIWAX supported only on PPC64");
8715 
8716  int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8717  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8718 
8719  SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8720 
8721  // STD the extended value into the stack slot.
8722  SDValue Store = DAG.getStore(
8723  Chain, dl, Ext64, FIdx,
8725  Chain = Store;
8726 
8727  // Load the value as a double.
8728  Ld = DAG.getLoad(
8729  MVT::f64, dl, Chain, FIdx,
8731  Chain = Ld.getValue(1);
8732  }
8733 
8734  // FCFID it and return it.
8735  SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8736  if (IsStrict)
8737  Chain = FP.getValue(1);
8738  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8739  if (IsStrict)
8740  FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8742  {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8743  else
8744  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8745  DAG.getIntPtrConstant(0, dl));
8746  }
8747  return FP;
8748 }
8749 
8750 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8751  SelectionDAG &DAG) const {
8752  SDLoc dl(Op);
8753  /*
8754  The rounding mode is in bits 30:31 of FPSR, and has the following
8755  settings:
8756  00 Round to nearest
8757  01 Round to 0
8758  10 Round to +inf
8759  11 Round to -inf
8760 
8761  FLT_ROUNDS, on the other hand, expects the following:
8762  -1 Undefined
8763  0 Round to 0
8764  1 Round to nearest
8765  2 Round to +inf
8766  3 Round to -inf
8767 
8768  To perform the conversion, we do:
8769  ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8770  */
8771 
8772  MachineFunction &MF = DAG.getMachineFunction();
8773  EVT VT = Op.getValueType();
8774  EVT PtrVT = getPointerTy(MF.getDataLayout());
8775 
8776  // Save FP Control Word to register
8777  SDValue Chain = Op.getOperand(0);
8778  SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8779  Chain = MFFS.getValue(1);
8780 
8781  SDValue CWD;
8782  if (isTypeLegal(MVT::i64)) {
8783  CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8784  DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8785  } else {
8786  // Save FP register to stack slot
8787  int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8788  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8789  Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8790 
8791  // Load FP Control Word from low 32 bits of stack slot.
8793  "Stack slot adjustment is valid only on big endian subtargets!");
8794  SDValue Four = DAG.getConstant(4, dl, PtrVT);
8795  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8796  CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8797  Chain = CWD.getValue(1);
8798  }
8799 
8800  // Transform as necessary
8801  SDValue CWD1 =
8802  DAG.getNode(ISD::AND, dl, MVT::i32,
8803  CWD, DAG.getConstant(3, dl, MVT::i32));
8804  SDValue CWD2 =
8805  DAG.getNode(ISD::SRL, dl, MVT::i32,
8806  DAG.getNode(ISD::AND, dl, MVT::i32,
8807  DAG.getNode(ISD::XOR, dl, MVT::i32,
8808  CWD, DAG.getConstant(3, dl, MVT::i32)),
8809  DAG.getConstant(3, dl, MVT::i32)),
8810  DAG.getConstant(1, dl, MVT::i32));
8811 
8812  SDValue RetVal =
8813  DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8814 
8815  RetVal =
8817  dl, VT, RetVal);
8818 
8819  return DAG.getMergeValues({RetVal, Chain}, dl);
8820 }
8821 
8822 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8823  EVT VT = Op.getValueType();
8824  unsigned BitWidth = VT.getSizeInBits();
8825  SDLoc dl(Op);
8826  assert(Op.getNumOperands() == 3 &&
8827  VT == Op.getOperand(1).getValueType() &&
8828  "Unexpected SHL!");
8829 
8830  // Expand into a bunch of logical ops. Note that these ops
8831  // depend on the PPC behavior for oversized shift amounts.
8832  SDValue Lo = Op.getOperand(0);
8833  SDValue Hi = Op.getOperand(1);
8834  SDValue Amt = Op.getOperand(2);
8835  EVT AmtVT = Amt.getValueType();
8836 
8837  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8838  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8839  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8840  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8841  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8842  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8843  DAG.getConstant(-BitWidth, dl, AmtVT));
8844  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8845  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8846  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8847  SDValue OutOps[] = { OutLo, OutHi };
8848  return DAG.getMergeValues(OutOps, dl);
8849 }
8850 
8851 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8852  EVT VT = Op.getValueType();
8853  SDLoc dl(Op);
8854  unsigned BitWidth = VT.getSizeInBits();
8855  assert(Op.getNumOperands() == 3 &&
8856  VT == Op.getOperand(1).getValueType() &&
8857  "Unexpected SRL!");
8858 
8859  // Expand into a bunch of logical ops. Note that these ops
8860  // depend on the PPC behavior for oversized shift amounts.
8861  SDValue Lo = Op.getOperand(0);
8862  SDValue Hi = Op.getOperand(1);
8863  SDValue Amt = Op.getOperand(2);
8864  EVT AmtVT = Amt.getValueType();
8865 
8866  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8867  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8868  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8869  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8870  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8871  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8872  DAG.getConstant(-BitWidth, dl, AmtVT));
8873  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8874  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8875  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8876  SDValue OutOps[] = { OutLo, OutHi };
8877  return DAG.getMergeValues(OutOps, dl);
8878 }
8879 
8880 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8881  SDLoc dl(Op);
8882  EVT VT = Op.getValueType();
8883  unsigned BitWidth = VT.getSizeInBits();
8884  assert(Op.getNumOperands() == 3 &&
8885  VT == Op.getOperand(1).getValueType() &&
8886  "Unexpected SRA!");
8887 
8888  // Expand into a bunch of logical ops, followed by a select_cc.
8889  SDValue Lo = Op.getOperand(0);
8890  SDValue Hi = Op.getOperand(1);
8891  SDValue Amt = Op.getOperand(2);
8892  EVT AmtVT = Amt.getValueType();
8893 
8894  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8895  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8896  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8897  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8898  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8899  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8900  DAG.getConstant(-BitWidth, dl, AmtVT));
8901  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8902  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8903  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8904  Tmp4, Tmp6, ISD::SETLE);
8905  SDValue OutOps[] = { OutLo, OutHi };
8906  return DAG.getMergeValues(OutOps, dl);
8907 }
8908 
8909 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8910  SelectionDAG &DAG) const {
8911  SDLoc dl(Op);
8912  EVT VT = Op.getValueType();
8913  unsigned BitWidth = VT.getSizeInBits();
8914 
8915  bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8916  SDValue X = Op.getOperand(0);
8917  SDValue Y = Op.getOperand(1);
8918  SDValue Z = Op.getOperand(2);
8919  EVT AmtVT = Z.getValueType();
8920 
8921  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8922  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8923  // This is simpler than TargetLowering::expandFunnelShift because we can rely
8924  // on PowerPC shift by BW being well defined.
8925  Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8926  DAG.getConstant(BitWidth - 1, dl, AmtVT));
8927  SDValue SubZ =
8928  DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8929  X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8930  Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8931  return DAG.getNode(ISD::OR, dl, VT, X, Y);
8932 }
8933 
8934 //===----------------------------------------------------------------------===//
8935 // Vector related lowering.
8936 //
8937 
8938 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8939 /// element size of SplatSize. Cast the result to VT.
8940 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8941  SelectionDAG &DAG, const SDLoc &dl) {
8942  static const MVT VTys[] = { // canonical VT to use for each size.
8944  };
8945 
8946  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8947 
8948  // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8949  if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
8950  SplatSize = 1;
8951  Val = 0xFF;
8952  }
8953 
8954  EVT CanonicalVT = VTys[SplatSize-1];
8955 
8956  // Build a canonical splat for this value.
8957  return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8958 }
8959 
8960 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8961 /// specified intrinsic ID.
8962 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8963  const SDLoc &dl, EVT DestVT = MVT::Other) {
8964  if (DestVT == MVT::Other) DestVT = Op.getValueType();
8965  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8966  DAG.getConstant(IID, dl, MVT::i32), Op);
8967 }
8968 
8969 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8970 /// specified intrinsic ID.
8972  SelectionDAG &DAG, const SDLoc &dl,
8973  EVT DestVT = MVT::Other) {
8974  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8975  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8976  DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8977 }
8978 
8979 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8980 /// specified intrinsic ID.
8981 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8982  SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8983  EVT DestVT = MVT::Other) {
8984  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8985  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8986  DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8987 }
8988 
8989 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8990 /// amount. The result has the specified value type.
8991 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8992  SelectionDAG &DAG, const SDLoc &dl) {
8993  // Force LHS/RHS to be the right type.
8994  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8995  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8996 
8997  int Ops[16];
8998  for (unsigned i = 0; i != 16; ++i)
8999  Ops[i] = i + Amt;
9000  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9001  return DAG.getNode(ISD::BITCAST, dl, VT, T);
9002 }
9003 
9004 /// Do we have an efficient pattern in a .td file for this node?
9005 ///
9006 /// \param V - pointer to the BuildVectorSDNode being matched
9007 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9008 ///
9009 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9010 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9011 /// the opposite is true (expansion is beneficial) are:
9012 /// - The node builds a vector out of integers that are not 32 or 64-bits
9013 /// - The node builds a vector out of constants
9014 /// - The node is a "load-and-splat"
9015 /// In all other cases, we will choose to keep the BUILD_VECTOR.
9017  bool HasDirectMove,
9018  bool HasP8Vector) {
9019  EVT VecVT = V->getValueType(0);
9020  bool RightType = VecVT == MVT::v2f64 ||
9021  (HasP8Vector && VecVT == MVT::v4f32) ||
9022  (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9023  if (!RightType)
9024  return false;
9025 
9026  bool IsSplat = true;
9027  bool IsLoad = false;
9028  SDValue Op0 = V->getOperand(0);
9029 
9030  // This function is called in a block that confirms the node is not a constant
9031  // splat. So a constant BUILD_VECTOR here means the vector is built out of
9032  // different constants.
9033  if (V->isConstant())
9034  return false;
9035  for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9036  if (V->getOperand(i).isUndef())
9037  return false;
9038  // We want to expand nodes that represent load-and-splat even if the
9039  // loaded value is a floating point truncation or conversion to int.
9040  if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9041  (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9042  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9043  (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9044  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9045  (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9046  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9047  IsLoad = true;
9048  // If the operands are different or the input is not a load and has more
9049  // uses than just this BV node, then it isn't a splat.
9050  if (V->getOperand(i) != Op0 ||
9051  (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9052  IsSplat = false;
9053  }
9054  return !(IsSplat && IsLoad);
9055 }
9056 
9057 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9058 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9059 
9060  SDLoc dl(Op);
9061  SDValue Op0 = Op->getOperand(0);
9062 
9063  if ((Op.getValueType() != MVT::f128) ||
9064  (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9065  (Op0.getOperand(0).getValueType() != MVT::i64) ||
9066  (Op0.getOperand(1).getValueType() != MVT::i64))
9067  return SDValue();
9068 
9069  return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9070  Op0.getOperand(1));
9071 }
9072 
9073 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9074  const SDValue *InputLoad = &Op;
9075  while (InputLoad->getOpcode() == ISD::BITCAST)
9076  InputLoad = &InputLoad->getOperand(0);
9077  if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9078  InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9079  IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9080  InputLoad = &InputLoad->getOperand(0);
9081  }
9082  if (InputLoad->getOpcode() != ISD::LOAD)
9083  return nullptr;
9084  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9085  return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9086 }
9087 
9088 // Convert the argument APFloat to a single precision APFloat if there is no
9089 // loss in information during the conversion to single precision APFloat and the
9090 // resulting number is not a denormal number. Return true if successful.
9092  APFloat APFloatToConvert = ArgAPFloat;
9093  bool LosesInfo = true;
9095  &LosesInfo);
9096  bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9097  if (Success)
9098  ArgAPFloat = APFloatToConvert;
9099  return Success;
9100 }
9101 
9102 // Bitcast the argument APInt to a double and convert it to a single precision
9103 // APFloat, bitcast the APFloat to an APInt and assign it to the original
9104 // argument if there is no loss in information during the conversion from
9105 // double to single precision APFloat and the resulting number is not a denormal
9106 // number. Return true if successful.
9108  double DpValue = ArgAPInt.bitsToDouble();
9109  APFloat APFloatDp(DpValue);
9110  bool Success = convertToNonDenormSingle(APFloatDp);
9111  if (Success)
9112  ArgAPInt = APFloatDp.bitcastToAPInt();
9113  return Success;
9114 }
9115 
9116 // Nondestructive check for convertTonNonDenormSingle.
9118  // Only convert if it loses info, since XXSPLTIDP should
9119  // handle the other case.
9120  APFloat APFloatToConvert = ArgAPFloat;
9121  bool LosesInfo = true;
9123  &LosesInfo);
9124 
9125  return (!LosesInfo && !APFloatToConvert.isDenormal());
9126 }
9127 
9128 static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9129  unsigned &Opcode) {
9130  LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9131  if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9132  return false;
9133 
9134  EVT Ty = Op->getValueType(0);
9135  // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9136  // as we cannot handle extending loads for these types.
9137  if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9138  ISD::isNON_EXTLoad(InputNode))
9139  return true;
9140 
9141  EVT MemVT = InputNode->getMemoryVT();
9142  // For v8i16 and v16i8 types, extending loads can be handled as long as the
9143  // memory VT is the same vector element VT type.
9144  // The loads feeding into the v8i16 and v16i8 types will be extending because
9145  // scalar i8/i16 are not legal types.
9146  if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9147  (MemVT == Ty.getVectorElementType()))
9148  return true;
9149 
9150  if (Ty == MVT::v2i64) {
9151  // Check the extend type, when the input type is i32, and the output vector
9152  // type is v2i64.
9153  if (MemVT == MVT::i32) {
9154  if (ISD::isZEXTLoad(InputNode))
9156  if (ISD::isSEXTLoad(InputNode))
9158  }
9159  return true;
9160  }
9161  return false;
9162 }
9163 
9164 // If this is a case we can't handle, return null and let the default
9165 // expansion code take care of it. If we CAN select this case, and if it
9166 // selects to a single instruction, return Op. Otherwise, if we can codegen
9167 // this case more efficiently than a constant pool load, lower it to the
9168 // sequence of ops that should be used.
9169 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9170  SelectionDAG &DAG) const {
9171  SDLoc dl(Op);
9172  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9173  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9174 
9175  // Check if this is a splat of a constant value.
9176  APInt APSplatBits, APSplatUndef;
9177  unsigned SplatBitSize;
9178  bool HasAnyUndefs;
9179  bool BVNIsConstantSplat =
9180  BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9181  HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9182 
9183  // If it is a splat of a double, check if we can shrink it to a 32 bit
9184  // non-denormal float which when converted back to double gives us the same
9185  // double. This is to exploit the XXSPLTIDP instruction.
9186  // If we lose precision, we use XXSPLTI32DX.
9187  if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9188  Subtarget.hasPrefixInstrs()) {
9189  // Check the type first to short-circuit so we don't modify APSplatBits if
9190  // this block isn't executed.
9191  if ((Op->getValueType(0) == MVT::v2f64) &&
9192  convertToNonDenormSingle(APSplatBits)) {
9193  SDValue SplatNode = DAG.getNode(
9195  DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9196  return DAG.getBitcast(Op.getValueType(), SplatNode);
9197  } else {
9198  // We may lose precision, so we have to use XXSPLTI32DX.
9199 
9200  uint32_t Hi =
9201  (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9202  uint32_t Lo =
9203  (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9204  SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9205 
9206  if (!Hi || !Lo)
9207  // If either load is 0, then we should generate XXLXOR to set to 0.
9208  SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9209 
9210  if (Hi)
9211  SplatNode = DAG.getNode(
9212  PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9213  DAG.getTargetConstant(0, dl, MVT::i32),
9214  DAG.getTargetConstant(Hi, dl, MVT::i32));
9215 
9216  if (Lo)
9217  SplatNode =
9218  DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9219  DAG.getTargetConstant(1, dl, MVT::i32),
9220  DAG.getTargetConstant(Lo, dl, MVT::i32));
9221 
9222  return DAG.getBitcast(Op.getValueType(), SplatNode);
9223  }
9224  }
9225 
9226  if (!BVNIsConstantSplat || SplatBitSize > 32) {
9227  unsigned NewOpcode = PPCISD::LD_SPLAT;
9228 
9229  // Handle load-and-splat patterns as we have instructions that will do this
9230  // in one go.
9231  if (DAG.isSplatValue(Op, true) &&
9232  isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9233  const SDValue *InputLoad = &Op.getOperand(0);
9234  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9235 
9236  // If the input load is an extending load, it will be an i32 -> i64
9237  // extending load and isValidSplatLoad() will update NewOpcode.
9238  unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9239  unsigned ElementSize =
9240  MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9241 
9242  assert(((ElementSize == 2 * MemorySize)
9243  ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9244  NewOpcode == PPCISD::SEXT_LD_SPLAT)
9245  : (NewOpcode == PPCISD::LD_SPLAT)) &&
9246  "Unmatched element size and opcode!\n");
9247 
9248  // Checking for a single use of this load, we have to check for vector
9249  // width (128 bits) / ElementSize uses (since each operand of the
9250  // BUILD_VECTOR is a separate use of the value.
9251  unsigned NumUsesOfInputLD = 128 / ElementSize;
9252  for (SDValue BVInOp : Op->ops())
9253  if (BVInOp.isUndef())
9254  NumUsesOfInputLD--;
9255 
9256  // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9257  // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9258  // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9259  // 15", but funciton IsValidSplatLoad() now will only return true when
9260  // the data at index 0 is not nullptr. So we will not get into trouble for
9261  // these cases.
9262  //
9263  // case 1 - lfiwzx/lfiwax
9264  // 1.1: load result is i32 and is sign/zero extend to i64;
9265  // 1.2: build a v2i64 vector type with above loaded value;
9266  // 1.3: the vector has only one value at index 0, others are all undef;
9267  // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9268  if (NumUsesOfInputLD == 1 &&
9269  (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9270  !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9271  Subtarget.hasLFIWAX()))
9272  return SDValue();
9273 
9274  // case 2 - lxvr[hb]x
9275  // 2.1: load result is at most i16;
9276  // 2.2: build a vector with above loaded value;
9277  // 2.3: the vector has only one value at index 0, others are all undef;
9278  // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9279  if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9280  Subtarget.isISA3_1() && ElementSize <= 16)
9281  return SDValue();
9282 
9283  assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9284  if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9285  Subtarget.hasVSX()) {
9286  SDValue Ops[] = {
9287  LD->getChain(), // Chain
9288  LD->getBasePtr(), // Ptr
9289  DAG.getValueType(Op.getValueType()) // VT
9290  };
9291  SDValue LdSplt = DAG.getMemIntrinsicNode(
9292  NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9293  LD->getMemoryVT(), LD->getMemOperand());
9294  // Replace all uses of the output chain of the original load with the
9295  // output chain of the new load.
9296  DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9297  LdSplt.getValue(1));
9298  return LdSplt;
9299  }
9300  }
9301 
9302  // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9303  // 32-bits can be lowered to VSX instructions under certain conditions.
9304  // Without VSX, there is no pattern more efficient than expanding the node.
9305  if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9307  Subtarget.hasP8Vector()))
9308  return Op;
9309  return SDValue();
9310  }
9311 
9312  uint64_t SplatBits = APSplatBits.getZExtValue();
9313  uint64_t SplatUndef = APSplatUndef.getZExtValue();
9314  unsigned SplatSize = SplatBitSize / 8;
9315 
9316  // First, handle single instruction cases.
9317 
9318  // All zeros?
9319  if (SplatBits == 0) {
9320  // Canonicalize all zero vectors to be v4i32.
9321  if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9322  SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9323  Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9324  }
9325  return Op;
9326  }
9327 
9328  // We have XXSPLTIW for constant splats four bytes wide.
9329  // Given vector length is a multiple of 4, 2-byte splats can be replaced
9330  // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9331  // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9332  // turned into a 4-byte splat of 0xABABABAB.
9333  if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9334  return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9335  Op.getValueType(), DAG, dl);
9336 
9337  if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9338  return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9339  dl);
9340 
9341  // We have XXSPLTIB for constant splats one byte wide.
9342  if (Subtarget.hasP9Vector() && SplatSize == 1)
9343  return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9344  dl);
9345 
9346  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9347  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9348  (32-SplatBitSize));
9349  if (SextVal >= -16 && SextVal <= 15)
9350  return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9351  dl);
9352 
9353  // Two instruction sequences.
9354 
9355  // If this value is in the range [-32,30] and is even, use:
9356  // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9357  // If this value is in the range [17,31] and is odd, use:
9358  // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9359  // If this value is in the range [-31,-17] and is odd, use:
9360  // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9361  // Note the last two are three-instruction sequences.
9362  if (SextVal >= -32 && SextVal <= 31) {
9363  // To avoid having these optimizations undone by constant folding,
9364  // we convert to a pseudo that will be expanded later into one of
9365  // the above forms.
9366  SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9367  EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9368  (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9369  SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9370  SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9371  if (VT == Op.getValueType())
9372  return RetVal;
9373  else
9374  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9375  }
9376 
9377  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9378  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9379  // for fneg/fabs.
9380  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9381  // Make -1 and vspltisw -1:
9382  SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9383 
9384  // Make the VSLW intrinsic, computing 0x8000_0000.
9385  SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9386  OnesV, DAG, dl);
9387 
9388  // xor by OnesV to invert it.
9389  Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9390  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9391  }
9392 
9393  // Check to see if this is a wide variety of vsplti*, binop self cases.
9394  static const signed char SplatCsts[] = {
9395  -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9396  -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9397  };
9398 
9399  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9400  // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9401  // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9402  int i = SplatCsts[idx];
9403 
9404  // Figure out what shift amount will be used by altivec if shifted by i in
9405  // this splat size.
9406  unsigned TypeShiftAmt = i & (SplatBitSize-1);
9407 
9408  // vsplti + shl self.
9409  if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9410  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9411  static const unsigned IIDs[] = { // Intrinsic to use for each size.
9412  Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9413  Intrinsic::ppc_altivec_vslw
9414  };
9415  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9416  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9417  }
9418 
9419  // vsplti + srl self.
9420  if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9421  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9422  static const unsigned IIDs[] = { // Intrinsic to use for each size.
9423  Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9424  Intrinsic::ppc_altivec_vsrw
9425  };
9426  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9427  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9428  }
9429 
9430  // vsplti + rol self.
9431  if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9432  ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9433  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9434  static const unsigned IIDs[] = { // Intrinsic to use for each size.
9435  Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9436  Intrinsic::ppc_altivec_vrlw
9437  };
9438  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9439  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9440  }
9441 
9442  // t = vsplti c, result = vsldoi t, t, 1
9443  if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9444  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9445  unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9446  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9447  }
9448  // t = vsplti c, result = vsldoi t, t, 2
9449  if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9450  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9451  unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9452  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9453  }
9454  // t = vsplti c, result = vsldoi t, t, 3
9455  if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9456  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9457  unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9458  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9459  }
9460  }
9461 
9462  return SDValue();
9463 }
9464 
9465 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9466 /// the specified operations to build the shuffle.
9467 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9468  SDValue RHS, SelectionDAG &DAG,
9469  const SDLoc &dl) {
9470  unsigned OpNum = (PFEntry >> 26) & 0x0F;
9471  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9472  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9473 
9474  enum {
9475  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9476  OP_VMRGHW,
9477  OP_VMRGLW,
9478  OP_VSPLTISW0,
9479  OP_VSPLTISW1,
9480  OP_VSPLTISW2,
9481  OP_VSPLTISW3,
9482  OP_VSLDOI4,
9483  OP_VSLDOI8,
9484  OP_VSLDOI12
9485  };
9486 
9487  if (OpNum == OP_COPY) {
9488  if (LHSID == (1*9+2)*9+3) return LHS;
9489  assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9490  return RHS;
9491  }
9492 
9493  SDValue OpLHS, OpRHS;
9494  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9495  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9496 
9497  int ShufIdxs[16];
9498  switch (OpNum) {
9499  default: llvm_unreachable("Unknown i32 permute!");
9500  case OP_VMRGHW:
9501  ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9502  ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9503  ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9504  ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9505  break;
9506  case OP_VMRGLW:
9507  ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9508  ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9509  ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9510  ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9511  break;
9512  case OP_VSPLTISW0:
9513  for (unsigned i = 0; i != 16; ++i)
9514  ShufIdxs[i] = (i&3)+0;
9515  break;
9516  case OP_VSPLTISW1:
9517  for (unsigned i = 0; i != 16; ++i)
9518  ShufIdxs[i] = (i&3)+4;
9519  break;
9520  case OP_VSPLTISW2:
9521  for (unsigned i = 0; i != 16; ++i)
9522  ShufIdxs[i] = (i&3)+8;
9523  break;
9524  case OP_VSPLTISW3:
9525  for (unsigned i = 0; i != 16; ++i)
9526  ShufIdxs[i] = (i&3)+12;
9527  break;
9528  case OP_VSLDOI4:
9529  return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9530  case OP_VSLDOI8:
9531  return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9532  case OP_VSLDOI12:
9533  return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9534  }
9535  EVT VT = OpLHS.getValueType();
9536  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9537  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9538  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9539  return DAG.getNode(ISD::BITCAST, dl, VT, T);
9540 }
9541 
9542 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9543 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9544 /// SDValue.
9545 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9546  SelectionDAG &DAG) const {
9547  const unsigned BytesInVector = 16;
9548  bool IsLE = Subtarget.isLittleEndian();
9549  SDLoc dl(N);
9550  SDValue V1 = N->getOperand(0);
9551  SDValue V2 = N->getOperand(1);
9552  unsigned ShiftElts = 0, InsertAtByte = 0;
9553  bool Swap = false;
9554 
9555  // Shifts required to get the byte we want at element 7.
9556  unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9557  0, 15, 14, 13, 12, 11, 10, 9};
9558  unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9559  1, 2, 3, 4, 5, 6, 7, 8};
9560 
9561  ArrayRef<int> Mask = N->getMask();
9562  int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9563 
9564  // For each mask element, find out if we're just inserting something
9565  // from V2 into V1 or vice versa.
9566  // Possible permutations inserting an element from V2 into V1:
9567  // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9568  // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9569  // ...
9570  // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9571  // Inserting from V1 into V2 will be similar, except mask range will be
9572  // [16,31].
9573 
9574  bool FoundCandidate = false;
9575  // If both vector operands for the shuffle are the same vector, the mask
9576  // will contain only elements from the first one and the second one will be
9577  // undef.
9578  unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9579  // Go through the mask of half-words to find an element that's being moved
9580  // from one vector to the other.
9581  for (unsigned i = 0; i < BytesInVector; ++i) {
9582  unsigned CurrentElement = Mask[i];
9583  // If 2nd operand is undefined, we should only look for element 7 in the
9584  // Mask.
9585  if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9586  continue;
9587 
9588  bool OtherElementsInOrder = true;
9589  // Examine the other elements in the Mask to see if they're in original
9590  // order.
9591  for (unsigned j = 0; j < BytesInVector; ++j) {
9592  if (j == i)
9593  continue;
9594  // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9595  // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9596  // in which we always assume we're always picking from the 1st operand.
9597  int MaskOffset =
9598  (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9599  if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9600  OtherElementsInOrder = false;
9601  break;
9602  }
9603  }
9604  // If other elements are in original order, we record the number of shifts
9605  // we need to get the element we want into element 7. Also record which byte
9606  // in the vector we should insert into.
9607  if (OtherElementsInOrder) {
9608  // If 2nd operand is undefined, we assume no shifts and no swapping.
9609  if (V2.isUndef()) {
9610  ShiftElts = 0;
9611  Swap = false;
9612  } else {
9613  // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9614  ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9615  : BigEndianShifts[CurrentElement & 0xF];
9616  Swap = CurrentElement < BytesInVector;
9617  }
9618  InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9619  FoundCandidate = true;
9620  break;
9621  }
9622  }
9623 
9624  if (!FoundCandidate)
9625  return SDValue();
9626 
9627  // Candidate found, construct the proper SDAG sequence with VINSERTB,
9628  // optionally with VECSHL if shift is required.
9629  if (Swap)
9630  std::swap(V1, V2);
9631  if (V2.isUndef())
9632  V2 = V1;
9633  if (ShiftElts) {
9634  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9635  DAG.getConstant(ShiftElts, dl, MVT::i32));
9636  return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9637  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9638  }
9639  return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9640  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9641 }
9642 
9643 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9644 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9645 /// SDValue.
9646 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9647  SelectionDAG &DAG) const {
9648  const unsigned NumHalfWords = 8;
9649  const unsigned BytesInVector = NumHalfWords * 2;
9650  // Check that the shuffle is on half-words.
9651  if (!isNByteElemShuffleMask(N, 2, 1))
9652  return SDValue();
9653 
9654  bool IsLE = Subtarget.isLittleEndian();
9655  SDLoc dl(N);
9656  SDValue V1 = N->getOperand(0);
9657  SDValue V2 = N->getOperand(1);
9658  unsigned ShiftElts = 0, InsertAtByte = 0;
9659  bool Swap = false;
9660 
9661  // Shifts required to get the half-word we want at element 3.
9662  unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9663  unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9664 
9665  uint32_t Mask = 0;
9666  uint32_t OriginalOrderLow = 0x1234567;
9667  uint32_t OriginalOrderHigh = 0x89ABCDEF;
9668  // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9669  // 32-bit space, only need 4-bit nibbles per element.
9670  for (unsigned i = 0; i < NumHalfWords; ++i) {
9671  unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9672  Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9673  }
9674 
9675  // For each mask element, find out if we're just inserting something
9676  // from V2 into V1 or vice versa. Possible permutations inserting an element
9677  // from V2 into V1:
9678  // X, 1, 2, 3, 4, 5, 6, 7
9679  // 0, X, 2, 3, 4, 5, 6, 7
9680  // 0, 1, X, 3, 4, 5, 6, 7
9681  // 0, 1, 2, X, 4, 5, 6, 7
9682  // 0, 1, 2, 3, X, 5, 6, 7
9683  // 0, 1, 2, 3, 4, X, 6, 7
9684  // 0, 1, 2, 3, 4, 5, X, 7
9685  // 0, 1, 2, 3, 4, 5, 6, X
9686  // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9687 
9688  bool FoundCandidate = false;
9689  // Go through the mask of half-words to find an element that's being moved
9690  // from one vector to the other.
9691  for (unsigned i = 0; i < NumHalfWords; ++i) {
9692  unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9693  uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9694  uint32_t MaskOtherElts = ~(0xF << MaskShift);
9695  uint32_t TargetOrder = 0x0;
9696 
9697  // If both vector operands for the shuffle are the same vector, the mask
9698  // will contain only elements from the first one and the second one will be
9699  // undef.
9700  if (V2.isUndef()) {
9701  ShiftElts = 0;
9702  unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9703  TargetOrder = OriginalOrderLow;
9704  Swap = false;
9705  // Skip if not the correct element or mask of other elements don't equal
9706  // to our expected order.
9707  if (MaskOneElt == VINSERTHSrcElem &&
9708  (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9709  InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9710  FoundCandidate = true;
9711  break;
9712  }
9713  } else { // If both operands are defined.
9714  // Target order is [8,15] if the current mask is between [0,7].
9715  TargetOrder =
9716  (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9717  // Skip if mask of other elements don't equal our expected order.
9718  if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9719  // We only need the last 3 bits for the number of shifts.
9720  ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9721  : BigEndianShifts[MaskOneElt & 0x7];
9722  InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9723  Swap = MaskOneElt < NumHalfWords;
9724  FoundCandidate = true;
9725  break;
9726  }
9727  }
9728  }
9729 
9730  if (!FoundCandidate)
9731  return SDValue();
9732 
9733  // Candidate found, construct the proper SDAG sequence with VINSERTH,
9734  // optionally with VECSHL if shift is required.
9735  if (Swap)
9736  std::swap(V1, V2);
9737  if (V2.isUndef())
9738  V2 = V1;
9739  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9740  if (ShiftElts) {
9741  // Double ShiftElts because we're left shifting on v16i8 type.
9742  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9743  DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9744  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9745  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9746  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9747  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9748  }
9749  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9750  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9751  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9752  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9753 }
9754 
9755 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9756 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9757 /// return the default SDValue.
9758 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9759  SelectionDAG &DAG) const {
9760  // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9761  // to v16i8. Peek through the bitcasts to get the actual operands.
9764 
9765  auto ShuffleMask = SVN->getMask();
9766  SDValue VecShuffle(SVN, 0);
9767  SDLoc DL(SVN);
9768 
9769  // Check that we have a four byte shuffle.
9770  if (!isNByteElemShuffleMask(SVN, 4, 1))
9771  return SDValue();
9772 
9773  // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9774  if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9775  std::swap(LHS, RHS);
9777  ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9778  }
9779 
9780  // Ensure that the RHS is a vector of constants.
9781  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9782  if (!BVN)
9783  return SDValue();
9784 
9785  // Check if RHS is a splat of 4-bytes (or smaller).
9786  APInt APSplatValue, APSplatUndef;
9787  unsigned SplatBitSize;
9788  bool HasAnyUndefs;
9789  if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9790  HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9791  SplatBitSize > 32)
9792  return SDValue();
9793 
9794  // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9795  // The instruction splats a constant C into two words of the source vector
9796  // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9797  // Thus we check that the shuffle mask is the equivalent of
9798  // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9799  // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9800  // within each word are consecutive, so we only need to check the first byte.
9801  SDValue Index;
9802  bool IsLE = Subtarget.isLittleEndian();
9803  if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9804  (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9805  ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9806  Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9807  else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9808  (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9809  ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9810  Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9811  else
9812  return SDValue();
9813 
9814  // If the splat is narrower than 32-bits, we need to get the 32-bit value
9815  // for XXSPLTI32DX.
9816  unsigned SplatVal = APSplatValue.getZExtValue();
9817  for (; SplatBitSize < 32; SplatBitSize <<= 1)
9818  SplatVal |= (SplatVal << SplatBitSize);
9819 
9820  SDValue SplatNode = DAG.getNode(
9822  Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9823  return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9824 }
9825 
9826 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9827 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9828 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9829 /// i.e (or (shl x, C1), (srl x, 128-C1)).
9830 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9831  assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9832  assert(Op.getValueType() == MVT::v1i128 &&
9833  "Only set v1i128 as custom, other type shouldn't reach here!");
9834  SDLoc dl(Op);
9835  SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9836  SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9837  unsigned SHLAmt = N1.getConstantOperandVal(0);
9838  if (SHLAmt % 8 == 0) {
9839  std::array<int, 16> Mask;
9840  std::iota(Mask.begin(), Mask.end(), 0);
9841  std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9842  if (SDValue Shuffle =
9844  DAG.getUNDEF(MVT::v16i8), Mask))
9845  return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9846  }
9847  SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9848  SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9849  DAG.getConstant(SHLAmt, dl, MVT::i32));
9850  SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9851  DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9852  SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9853  return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9854 }
9855 
9856 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
9857 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
9858 /// return the code it can be lowered into. Worst case, it can always be
9859 /// lowered into a vperm.
9860 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9861  SelectionDAG &DAG) const {
9862  SDLoc dl(Op);
9863  SDValue V1 = Op.getOperand(0);
9864  SDValue V2 = Op.getOperand(1);
9865  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9866 
9867  // Any nodes that were combined in the target-independent combiner prior
9868  // to vector legalization will not be sent to the target combine. Try to
9869  // combine it here.
9870  if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9871  if (!isa<ShuffleVectorSDNode>(NewShuffle))
9872  return NewShuffle;
9873  Op = NewShuffle;
9874  SVOp = cast<ShuffleVectorSDNode>(Op);
9875  V1 = Op.getOperand(0);
9876  V2 = Op.getOperand(1);
9877  }
9878  EVT VT = Op.getValueType();
9879  bool isLittleEndian = Subtarget.isLittleEndian();
9880 
9881  unsigned ShiftElts, InsertAtByte;
9882  bool Swap = false;
9883 
9884  // If this is a load-and-splat, we can do that with a single instruction
9885  // in some cases. However if the load has multiple uses, we don't want to
9886  // combine it because that will just produce multiple loads.
9887  bool IsPermutedLoad = false;
9888  const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9889  if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9890  (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9891  InputLoad->hasOneUse()) {
9892  bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9893  int SplatIdx =
9894  PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9895 
9896  // The splat index for permuted loads will be in the left half of the vector
9897  // which is strictly wider than the loaded value by 8 bytes. So we need to
9898  // adjust the splat index to point to the correct address in memory.
9899  if (IsPermutedLoad) {
9900  assert((isLittleEndian || IsFourByte) &&
9901  "Unexpected size for permuted load on big endian target");
9902  SplatIdx += IsFourByte ? 2 : 1;
9903  assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9904  "Splat of a value outside of the loaded memory");
9905  }
9906 
9907  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9908  // For 4-byte load-and-splat, we need Power9.
9909  if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9910  uint64_t Offset = 0;
9911  if (IsFourByte)
9912  Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9913  else
9914  Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9915 
9916  // If the width of the load is the same as the width of the splat,
9917  // loading with an offset would load the wrong memory.
9918  if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
9919  Offset = 0;
9920 
9921  SDValue BasePtr = LD->getBasePtr();
9922  if (Offset != 0)
9924  BasePtr, DAG.getIntPtrConstant(Offset, dl));
9925  SDValue Ops[] = {
9926  LD->getChain(), // Chain
9927  BasePtr, // BasePtr
9928  DAG.getValueType(Op.getValueType()) // VT
9929  };
9930  SDVTList VTL =
9931  DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9932  SDValue LdSplt =
9934  Ops, LD->getMemoryVT(), LD->getMemOperand());
9935  DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
9936  if (LdSplt.getValueType() != SVOp->getValueType(0))
9937  LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9938  return LdSplt;
9939  }
9940  }
9941 
9942  // All v2i64 and v2f64 shuffles are legal
9943  if (VT == MVT::v2i64 || VT == MVT::v2f64)
9944  return Op;
9945 
9946  if (Subtarget.hasP9Vector() &&
9947  PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9948  isLittleEndian)) {
9949  if (Swap)
9950  std::swap(V1, V2);
9951  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9952  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9953  if (ShiftElts) {
9954  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9955  DAG.getConstant(ShiftElts, dl, MVT::i32));
9956  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9957  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9958  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9959  }
9960  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9961  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9962  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9963  }
9964 
9965  if (Subtarget.hasPrefixInstrs()) {
9966  SDValue SplatInsertNode;
9967  if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9968  return SplatInsertNode;
9969  }
9970 
9971  if (Subtarget.hasP9Altivec()) {
9972  SDValue NewISDNode;
9973  if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9974  return NewISDNode;
9975 
9976  if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9977  return NewISDNode;
9978  }
9979 
9980  if (Subtarget.hasVSX() &&
9981  PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9982  if (Swap)
9983  std::swap(V1, V2);
9984  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9985  SDValue Conv2 =
9986  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9987 
9988  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9989  DAG.getConstant(ShiftElts, dl, MVT::i32));
9990  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9991  }
9992 
9993  if (Subtarget.hasVSX() &&
9994  PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9995  if (Swap)
9996  std::swap(V1, V2);
9997  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9998  SDValue Conv2 =
9999  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10000 
10001  SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10002  DAG.getConstant(ShiftElts, dl, MVT::i32));
10003  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10004  }
10005 
10006  if (Subtarget.hasP9Vector()) {
10007  if (PPC::isXXBRHShuffleMask(SVOp)) {
10008  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10009  SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10010  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10011  } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10012  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10013  SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10014  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10015  } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10016  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10017  SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10018  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10019  } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10020  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10021  SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10022  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10023  }
10024  }
10025 
10026  if (Subtarget.hasVSX()) {
10027  if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10028  int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10029 
10030  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10031  SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10032  DAG.getConstant(SplatIdx, dl, MVT::i32));
10033  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10034  }
10035 
10036  // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10037  if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10038  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10039  SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10040  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10041  }
10042  }
10043 
10044  // Cases that are handled by instructions that take permute immediates
10045  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10046  // selected by the instruction selector.
10047  if (V2.isUndef()) {
10048  if (PPC::isSplatShuffleMask(SVOp, 1) ||
10049  PPC::isSplatShuffleMask(SVOp, 2) ||
10050  PPC::isSplatShuffleMask(SVOp, 4) ||
10051  PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10052  PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10053  PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10054  PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10055  PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10056  PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10057  PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10058  PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10059  PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10060  (Subtarget.hasP8Altivec() && (
10061  PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10062  PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10063  PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10064  return Op;
10065  }
10066  }
10067 
10068  // Altivec has a variety of "shuffle immediates" that take two vector inputs
10069  // and produce a fixed permutation. If any of these match, do not lower to
10070  // VPERM.
10071  unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10072  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10073  PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10074  PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10075  PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10076  PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10077  PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10078  PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10079  PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10080  PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10081  (Subtarget.hasP8Altivec() && (
10082  PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10083  PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10084  PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10085  return Op;
10086 
10087  // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10088  // perfect shuffle table to emit an optimal matching sequence.
10089  ArrayRef<int> PermMask = SVOp->getMask();
10090 
10091  if (!DisablePerfectShuffle && !isLittleEndian) {
10092  unsigned PFIndexes[4];
10093  bool isFourElementShuffle = true;
10094  for (unsigned i = 0; i != 4 && isFourElementShuffle;
10095  ++i) { // Element number
10096  unsigned EltNo = 8; // Start out undef.
10097  for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10098  if (PermMask[i * 4 + j] < 0)
10099  continue; // Undef, ignore it.
10100 
10101  unsigned ByteSource = PermMask[i * 4 + j];
10102  if ((ByteSource & 3) != j) {
10103  isFourElementShuffle = false;
10104  break;
10105  }
10106 
10107  if (EltNo == 8) {
10108  EltNo = ByteSource / 4;
10109  } else if (EltNo != ByteSource / 4) {
10110  isFourElementShuffle = false;
10111  break;
10112  }
10113  }
10114  PFIndexes[i] = EltNo;
10115  }
10116 
10117  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10118  // perfect shuffle vector to determine if it is cost effective to do this as
10119  // discrete instructions, or whether we should use a vperm.
10120  // For now, we skip this for little endian until such time as we have a
10121  // little-endian perfect shuffle table.
10122  if (isFourElementShuffle) {
10123  // Compute the index in the perfect shuffle table.
10124  unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10125  PFIndexes[2] * 9 + PFIndexes[3];
10126 
10127  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10128  unsigned Cost = (PFEntry >> 30);
10129 
10130  // Determining when to avoid vperm is tricky. Many things affect the cost
10131  // of vperm, particularly how many times the perm mask needs to be
10132  // computed. For example, if the perm mask can be hoisted out of a loop or
10133  // is already used (perhaps because there are multiple permutes with the
10134  // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10135  // permute mask out of the loop requires an extra register.
10136  //
10137  // As a compromise, we only emit discrete instructions if the shuffle can
10138  // be generated in 3 or fewer operations. When we have loop information
10139  // available, if this block is within a loop, we should avoid using vperm
10140  // for 3-operation perms and use a constant pool load instead.
10141  if (Cost < 3)
10142  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10143  }
10144  }
10145 
10146  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10147  // vector that will get spilled to the constant pool.
10148  if (V2.isUndef()) V2 = V1;
10149 
10150  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10151  // that it is in input element units, not in bytes. Convert now.
10152 
10153  // For little endian, the order of the input vectors is reversed, and
10154  // the permutation mask is complemented with respect to 31. This is
10155  // necessary to produce proper semantics with the big-endian-biased vperm
10156  // instruction.
10157  EVT EltVT = V1.getValueType().getVectorElementType();
10158  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
10159 
10160  SmallVector<SDValue, 16> ResultMask;
10161  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10162  unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10163 
10164  for (unsigned j = 0; j != BytesPerElement; ++j)
10165  if (isLittleEndian)
10166  ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
10167  dl, MVT::i32));
10168  else
10169  ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
10170  MVT::i32));
10171  }
10172 
10173  ShufflesHandledWithVPERM++;
10174  SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10175  LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
10176  LLVM_DEBUG(SVOp->dump());
10177  LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
10178  LLVM_DEBUG(VPermMask.dump());
10179 
10180  if (isLittleEndian)
10181  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10182  V2, V1, VPermMask);
10183  else
10184  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10185  V1, V2, VPermMask);
10186 }
10187 
10188 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10189 /// vector comparison. If it is, return true and fill in Opc/isDot with
10190 /// information about the intrinsic.
10191 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10192  bool &isDot, const PPCSubtarget &Subtarget) {
10193  unsigned IntrinsicID =
10194  cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10195  CompareOpc = -1;
10196  isDot = false;
10197  switch (IntrinsicID) {
10198  default:
10199  return false;
10200  // Comparison predicates.
10201  case Intrinsic::ppc_altivec_vcmpbfp_p:
10202  CompareOpc = 966;
10203  isDot = true;
10204  break;
10205  case Intrinsic::ppc_altivec_vcmpeqfp_p:
10206  CompareOpc = 198;
10207  isDot = true;
10208  break;
10209  case Intrinsic::ppc_altivec_vcmpequb_p:
10210  CompareOpc = 6;
10211  isDot = true;
10212  break;
10213  case Intrinsic::ppc_altivec_vcmpequh_p:
10214  CompareOpc = 70;
10215  isDot = true;
10216  break;
10217  case Intrinsic::ppc_altivec_vcmpequw_p:
10218  CompareOpc = 134;
10219  isDot = true;
10220  break;
10221  case Intrinsic::ppc_altivec_vcmpequd_p:
10222  if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10223  CompareOpc = 199;
10224  isDot = true;
10225  } else
10226  return false;
10227  break;
10228  case Intrinsic::ppc_altivec_vcmpneb_p:
10229  case Intrinsic::ppc_altivec_vcmpneh_p:
10230  case Intrinsic::ppc_altivec_vcmpnew_p:
10231  case Intrinsic::ppc_altivec_vcmpnezb_p:
10232  case Intrinsic::ppc_altivec_vcmpnezh_p:
10233  case Intrinsic::ppc_altivec_vcmpnezw_p:
10234  if (Subtarget.hasP9Altivec()) {
10235  switch (IntrinsicID) {
10236  default:
10237  llvm_unreachable("Unknown comparison intrinsic.");
10238  case Intrinsic::ppc_altivec_vcmpneb_p:
10239  CompareOpc = 7;
10240  break;
10241  case Intrinsic::ppc_altivec_vcmpneh_p:
10242  CompareOpc = 71;
10243  break;
10244  case Intrinsic::ppc_altivec_vcmpnew_p:
10245  CompareOpc = 135;
10246  break;
10247  case Intrinsic::ppc_altivec_vcmpnezb_p:
10248  CompareOpc = 263;
10249  break;
10250  case Intrinsic::ppc_altivec_vcmpnezh_p:
10251  CompareOpc = 327;
10252  break;
10253  case Intrinsic::ppc_altivec_vcmpnezw_p:
10254  CompareOpc = 391;
10255  break;
10256  }
10257  isDot = true;
10258  } else
10259  return false;
10260  break;
10261  case Intrinsic::ppc_altivec_vcmpgefp_p:
10262  CompareOpc = 454;
10263  isDot = true;
10264  break;
10265  case Intrinsic::ppc_altivec_vcmpgtfp_p:
10266  CompareOpc = 710;
10267  isDot = true;
10268  break;
10269  case Intrinsic::ppc_altivec_vcmpgtsb_p:
10270  CompareOpc = 774;
10271  isDot = true;
10272  break;
10273  case Intrinsic::ppc_altivec_vcmpgtsh_p:
10274  CompareOpc = 838;
10275  isDot = true;
10276  break;
10277  case Intrinsic::ppc_altivec_vcmpgtsw_p:
10278  CompareOpc = 902;
10279  isDot = true;
10280  break;
10281  case Intrinsic::ppc_altivec_vcmpgtsd_p:
10282  if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10283  CompareOpc = 967;
10284  isDot = true;
10285  } else
10286  return false;
10287  break;
10288  case Intrinsic::ppc_altivec_vcmpgtub_p:
10289  CompareOpc = 518;
10290  isDot = true;
10291  break;
10292  case Intrinsic::ppc_altivec_vcmpgtuh_p:
10293  CompareOpc = 582;
10294  isDot = true;
10295  break;
10296  case Intrinsic::ppc_altivec_vcmpgtuw_p:
10297  CompareOpc = 646;
10298  isDot = true;
10299  break;
10300  case Intrinsic::ppc_altivec_vcmpgtud_p:
10301  if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10302  CompareOpc = 711;
10303  isDot = true;
10304  } else
10305  return false;
10306  break;
10307 
10308  case Intrinsic::ppc_altivec_vcmpequq:
10309  case Intrinsic::ppc_altivec_vcmpgtsq:
10310  case Intrinsic::ppc_altivec_vcmpgtuq:
10311  if (!Subtarget.isISA3_1())
10312  return false;
10313  switch (IntrinsicID) {
10314  default:
10315  llvm_unreachable("Unknown comparison intrinsic.");
10316  case Intrinsic::ppc_altivec_vcmpequq:
10317  CompareOpc = 455;
10318  break;
10319  case Intrinsic::ppc_altivec_vcmpgtsq:
10320  CompareOpc = 903;
10321  break;
10322  case Intrinsic::ppc_altivec_vcmpgtuq:
10323  CompareOpc = 647;
10324  break;
10325  }
10326  break;
10327 
10328  // VSX predicate comparisons use the same infrastructure
10329  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10330  case Intrinsic::ppc_vsx_xvcmpgedp_p:
10331  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10332  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10333  case Intrinsic::ppc_vsx_xvcmpgesp_p:
10334  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10335  if (Subtarget.hasVSX()) {
10336  switch (IntrinsicID) {
10337  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10338  CompareOpc = 99;
10339  break;
10340  case Intrinsic::ppc_vsx_xvcmpgedp_p:
10341  CompareOpc = 115;
10342  break;
10343  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10344  CompareOpc = 107;
10345  break;
10346  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10347  CompareOpc = 67;
10348  break;
10349  case Intrinsic::ppc_vsx_xvcmpgesp_p:
10350  CompareOpc = 83;
10351  break;
10352  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10353  CompareOpc = 75;
10354  break;
10355  }
10356  isDot = true;
10357  } else
10358  return false;
10359  break;
10360 
10361  // Normal Comparisons.
10362  case Intrinsic::ppc_altivec_vcmpbfp:
10363  CompareOpc = 966;
10364  break;
10365  case Intrinsic::ppc_altivec_vcmpeqfp:
10366  CompareOpc = 198;
10367  break;
10368  case Intrinsic::ppc_altivec_vcmpequb:
10369  CompareOpc = 6;
10370  break;
10371  case Intrinsic::ppc_altivec_vcmpequh:
10372  CompareOpc = 70;
10373  break;
10374  case Intrinsic::ppc_altivec_vcmpequw:
10375  CompareOpc = 134;
10376  break;
10377  case Intrinsic::ppc_altivec_vcmpequd:
10378  if (Subtarget.hasP8Altivec())
10379  CompareOpc = 199;
10380  else
10381  return false;
10382  break;
10383  case Intrinsic::ppc_altivec_vcmpneb:
10384  case Intrinsic::ppc_altivec_vcmpneh:
10385  case Intrinsic::ppc_altivec_vcmpnew:
10386  case Intrinsic::ppc_altivec_vcmpnezb:
10387  case Intrinsic::ppc_altivec_vcmpnezh:
10388  case Intrinsic::ppc_altivec_vcmpnezw:
10389  if (Subtarget.hasP9Altivec())
10390  switch (IntrinsicID) {
10391  default:
10392  llvm_unreachable("Unknown comparison intrinsic.");
10393  case Intrinsic::ppc_altivec_vcmpneb:
10394  CompareOpc = 7;
10395  break;
10396  case Intrinsic::ppc_altivec_vcmpneh:
10397  CompareOpc = 71;
10398  break;
10399  case Intrinsic::ppc_altivec_vcmpnew:
10400  CompareOpc = 135;
10401  break;
10402  case Intrinsic::ppc_altivec_vcmpnezb:
10403  CompareOpc = 263;
10404  break;
10405  case Intrinsic::ppc_altivec_vcmpnezh:
10406  CompareOpc = 327;
10407  break;
10408  case Intrinsic::ppc_altivec_vcmpnezw:
10409  CompareOpc = 391;
10410  break;
10411  }
10412  else
10413  return false;
10414  break;
10415  case Intrinsic::ppc_altivec_vcmpgefp:
10416  CompareOpc = 454;
10417  break;
10418  case Intrinsic::ppc_altivec_vcmpgtfp:
10419  CompareOpc = 710;
10420  break;
10421  case Intrinsic::ppc_altivec_vcmpgtsb:
10422  CompareOpc = 774;
10423  break;
10424  case Intrinsic::ppc_altivec_vcmpgtsh:
10425  CompareOpc = 838;
10426  break;
10427  case Intrinsic::ppc_altivec_vcmpgtsw:
10428  CompareOpc = 902;
10429  break;
10430  case Intrinsic::ppc_altivec_vcmpgtsd:
10431  if (Subtarget.hasP8Altivec())
10432  CompareOpc = 967;
10433  else
10434  return false;
10435  break;
10436  case Intrinsic::ppc_altivec_vcmpgtub:
10437  CompareOpc = 518;
10438  break;
10439  case Intrinsic::ppc_altivec_vcmpgtuh:
10440  CompareOpc = 582;
10441  break;
10442  case Intrinsic::ppc_altivec_vcmpgtuw:
10443  CompareOpc = 646;
10444  break;
10445  case Intrinsic::ppc_altivec_vcmpgtud:
10446  if (Subtarget.hasP8Altivec())
10447  CompareOpc = 711;
10448  else
10449  return false;
10450  break;
10451  case Intrinsic::ppc_altivec_vcmpequq_p:
10452  case Intrinsic::ppc_altivec_vcmpgtsq_p:
10453  case Intrinsic::ppc_altivec_vcmpgtuq_p:
10454  if (!Subtarget.isISA3_1())
10455  return false;
10456  switch (IntrinsicID) {
10457  default:
10458  llvm_unreachable("Unknown comparison intrinsic.");
10459  case Intrinsic::ppc_altivec_vcmpequq_p:
10460  CompareOpc = 455;
10461  break;
10462  case Intrinsic::ppc_altivec_vcmpgtsq_p:
10463  CompareOpc = 903;
10464  break;
10465  case Intrinsic::ppc_altivec_vcmpgtuq_p:
10466  CompareOpc = 647;
10467  break;
10468  }
10469  isDot = true;
10470  break;
10471  }
10472  return true;
10473 }
10474 
10475 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10476 /// lower, do it, otherwise return null.
10477 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10478  SelectionDAG &DAG) const {
10479  unsigned IntrinsicID =
10480  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10481 
10482  SDLoc dl(Op);
10483 
10484  switch (IntrinsicID) {
10485  case Intrinsic::thread_pointer:
10486  // Reads the thread pointer register, used for __builtin_thread_pointer.
10487  if (Subtarget.isPPC64())
10488  return DAG.getRegister(PPC::X13, MVT::i64);
10489  return DAG.getRegister(PPC::R2, MVT::i32);
10490 
10491  case Intrinsic::ppc_mma_disassemble_acc:
10492  case Intrinsic::ppc_vsx_disassemble_pair: {
10493  int NumVecs = 2;
10494  SDValue WideVec = Op.getOperand(1);
10495  if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10496  NumVecs = 4;
10497  WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10498  }
10499  SmallVector<SDValue, 4> RetOps;
10500  for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10501  SDValue Extract = DAG.getNode(
10502  PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10503  DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10504  : VecNo,
10505  dl, getPointerTy(DAG.getDataLayout())));
10506  RetOps.push_back(Extract);
10507  }
10508  return DAG.getMergeValues(RetOps, dl);
10509  }
10510 
10511  case Intrinsic::ppc_unpack_longdouble: {
10512  auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10513  assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
10514  "Argument of long double unpack must be 0 or 1!");
10515  return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
10516  DAG.getConstant(!!(Idx->getSExtValue()), dl,
10517  Idx->getValueType(0)));
10518  }
10519 
10520  case Intrinsic::ppc_compare_exp_lt:
10521  case Intrinsic::ppc_compare_exp_gt:
10522  case Intrinsic::ppc_compare_exp_eq:
10523  case Intrinsic::ppc_compare_exp_uo: {
10524  unsigned Pred;
10525  switch (IntrinsicID) {
10526  case Intrinsic::ppc_compare_exp_lt:
10527  Pred = PPC::PRED_LT;
10528  break;
10529  case Intrinsic::ppc_compare_exp_gt:
10530  Pred = PPC::PRED_GT;
10531  break;
10532  case Intrinsic::ppc_compare_exp_eq:
10533  Pred = PPC::PRED_EQ;
10534  break;
10535  case Intrinsic::ppc_compare_exp_uo:
10536  Pred = PPC::PRED_UN;
10537  break;
10538  }
10539  return SDValue(
10540  DAG.getMachineNode(
10541  PPC::SELECT_CC_I4, dl, MVT::i32,
10542  {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
10543  Op.getOperand(1), Op.getOperand(2)),
10544  0),
10545  DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10546  DAG.getTargetConstant(Pred, dl, MVT::i32)}),
10547  0);
10548  }
10549  case Intrinsic::ppc_test_data_class_d:
10550  case Intrinsic::ppc_test_data_class_f: {
10551  unsigned CmprOpc = PPC::XSTSTDCDP;
10552  if (IntrinsicID == Intrinsic::ppc_test_data_class_f)
10553  CmprOpc = PPC::XSTSTDCSP;
10554  return SDValue(
10555  DAG.getMachineNode(
10556  PPC::SELECT_CC_I4, dl, MVT::i32,
10557  {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
10558  Op.getOperand(1)),
10559  0),
10560  DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10561  DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
10562  0);
10563  }
10564  case Intrinsic::ppc_fnmsub: {
10565  EVT VT = Op.getOperand(1).getValueType();
10566  if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
10567  return DAG.getNode(
10568  ISD::FNEG, dl, VT,
10569  DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
10570  DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
10571  return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
10572  Op.getOperand(2), Op.getOperand(3));
10573  }
10574  case Intrinsic::ppc_convert_f128_to_ppcf128:
10575  case Intrinsic::ppc_convert_ppcf128_to_f128: {
10576  RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
10577  ? RTLIB::CONVERT_PPCF128_F128
10578  : RTLIB::CONVERT_F128_PPCF128;
10579  MakeLibCallOptions CallOptions;
10580  std::pair<SDValue, SDValue> Result =
10581  makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
10582  dl, SDValue());
10583  return Result.first;
10584  }
10585  case Intrinsic::ppc_maxfe:
10586  case Intrinsic::ppc_maxfl:
10587  case Intrinsic::ppc_maxfs:
10588  case Intrinsic::ppc_minfe:
10589  case Intrinsic::ppc_minfl:
10590  case Intrinsic::ppc_minfs: {
10591  EVT VT = Op.getValueType();
10592  assert(
10593  all_of(Op->ops().drop_front(4),
10594  [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
10595  "ppc_[max|min]f[e|l|s] must have uniform type arguments");
10596  (void)VT;
10598  if (IntrinsicID == Intrinsic::ppc_minfe ||
10599  IntrinsicID == Intrinsic::ppc_minfl ||
10600  IntrinsicID == Intrinsic::ppc_minfs)
10601  CC = ISD::SETLT;
10602  unsigned I = Op.getNumOperands() - 2, Cnt = I;
10603  SDValue Res = Op.getOperand(I);
10604  for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
10605  Res =
10606  DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
10607  }
10608  return Res;
10609  }
10610  }
10611 
10612  // If this is a lowered altivec predicate compare, CompareOpc is set to the
10613  // opcode number of the comparison.
10614  int CompareOpc;
10615  bool isDot;
10616  if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10617  return SDValue(); // Don't custom lower most intrinsics.
10618 
10619  // If this is a non-dot comparison, make the VCMP node and we are done.
10620  if (!isDot) {
10621  SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10622  Op.getOperand(1), Op.getOperand(2),
10623  DAG.getConstant(CompareOpc, dl, MVT::i32));
10624  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10625  }
10626 
10627  // Create the PPCISD altivec 'dot' comparison node.
10628  SDValue Ops[] = {
10629  Op.getOperand(2), // LHS
10630  Op.getOperand(3), // RHS
10631  DAG.getConstant(CompareOpc, dl, MVT::i32)
10632  };
10633  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10634  SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
10635 
10636  // Now that we have the comparison, emit a copy from the CR to a GPR.
10637  // This is flagged to the above dot comparison.
10638  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10639  DAG.getRegister(PPC::CR6, MVT::i32),
10640  CompNode.getValue(1));
10641 
10642  // Unpack the result based on how the target uses it.
10643  unsigned BitNo; // Bit # of CR6.
10644  bool InvertBit; // Invert result?
10645  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10646  default: // Can't happen, don't crash on invalid number though.
10647  case 0: // Return the value of the EQ bit of CR6.
10648  BitNo = 0; InvertBit = false;
10649  break;
10650  case 1: // Return the inverted value of the EQ bit of CR6.
10651  BitNo = 0; InvertBit = true;
10652  break;
10653  case 2: // Return the value of the LT bit of CR6.
10654  BitNo = 2; InvertBit = false;
10655  break;
10656  case 3: // Return the inverted value of the LT bit of CR6.
10657  BitNo = 2; InvertBit = true;
10658  break;
10659  }
10660 
10661  // Shift the bit into the low position.
10662  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10663  DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10664  // Isolate the bit.
10665  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10666  DAG.getConstant(1, dl, MVT::i32));
10667 
10668  // If we are supposed to, toggle the bit.
10669  if (InvertBit)
10670  Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10671  DAG.getConstant(1, dl, MVT::i32));
10672  return Flags;
10673 }
10674 
10675 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10676  SelectionDAG &DAG) const {
10677  // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10678  // the beginning of the argument list.
10679  int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10680  SDLoc DL(Op);
10681  switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10682  case Intrinsic::ppc_cfence: {
10683  assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10684  assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10685  SDValue Val = Op.getOperand(ArgStart + 1);
10686  EVT Ty = Val.getValueType();
10687  if (Ty == MVT::i128) {
10688  // FIXME: Testing one of two paired registers is sufficient to guarantee
10689  // ordering?
10690  Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
10691  }
10692  return SDValue(
10693  DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10694  DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
10695  Op.getOperand(0)),
10696  0);
10697  }
10698  default:
10699  break;
10700  }
10701  return SDValue();
10702 }
10703 
10704 // Lower scalar BSWAP64 to xxbrd.
10705 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10706  SDLoc dl(Op);
10707  if (!Subtarget.isPPC64())
10708  return Op;
10709  // MTVSRDD
10710  Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10711  Op.getOperand(0));
10712  // XXBRD
10713  Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10714  // MFVSRD
10715  int VectorIndex = 0;
10716  if (Subtarget.isLittleEndian())
10717  VectorIndex = 1;
10719  DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10720  return Op;
10721 }
10722 
10723 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10724 // compared to a value that is atomically loaded (atomic loads zero-extend).
10725 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10726  SelectionDAG &DAG) const {
10727  assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10728  "Expecting an atomic compare-and-swap here.");
10729  SDLoc dl(Op);
10730  auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10731  EVT MemVT = AtomicNode->getMemoryVT();
10732  if (MemVT.getSizeInBits() >= 32)
10733  return Op;
10734 
10735  SDValue CmpOp = Op.getOperand(2);
10736  // If this is already correctly zero-extended, leave it alone.
10737  auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10738  if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10739  return Op;
10740 
10741  // Clear the high bits of the compare operand.
10742  unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10743  SDValue NewCmpOp =
10744  DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10745  DAG.getConstant(MaskVal, dl, MVT::i32));
10746 
10747  // Replace the existing compare operand with the properly zero-extended one.
10749  for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10750  Ops.push_back(AtomicNode->getOperand(i));
10751  Ops[2] = NewCmpOp;
10752  MachineMemOperand *MMO = AtomicNode->getMemOperand();
10753  SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10754  auto NodeTy =
10756  return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10757 }
10758 
10759 SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
10760  SelectionDAG &DAG) const {
10761  AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
10762  EVT MemVT = N->getMemoryVT();
10763  assert(MemVT.getSimpleVT() == MVT::i128 &&
10764  "Expect quadword atomic operations");
10765  SDLoc dl(N);
10766  unsigned Opc = N->getOpcode();
10767  switch (Opc) {
10768  case ISD::ATOMIC_LOAD: {
10769  // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
10770  // lowered to ppc instructions by pattern matching instruction selector.
10773  N->getOperand(0),
10774  DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
10775  for (int I = 1, E = N->getNumOperands(); I < E; ++I)
10776  Ops.push_back(N->getOperand(I));
10777  SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
10778  Ops, MemVT, N->getMemOperand());
10779  SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
10780  SDValue ValHi =
10781  DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
10782  ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
10783  DAG.getConstant(64, dl, MVT::i32));
10784  SDValue Val =
10785  DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
10786  return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
10787  {Val, LoadedVal.getValue(2)});
10788  }
10789  case ISD::ATOMIC_STORE: {
10790  // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
10791  // lowered to ppc instructions by pattern matching instruction selector.
10792  SDVTList Tys = DAG.getVTList(MVT::Other);
10794  N->getOperand(0),
10795  DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
10796  SDValue Val = N->getOperand(2);
10797  SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
10798  SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
10799  DAG.getConstant(64, dl, MVT::i32));
10800  ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
10801  Ops.push_back(ValLo);
10802  Ops.push_back(ValHi);
10803  Ops.push_back(N->getOperand(1));
10804  return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
10805  N->getMemOperand());
10806  }
10807  default:
10808  llvm_unreachable("Unexpected atomic opcode");
10809  }
10810 }
10811 
10812 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10813  SelectionDAG &DAG) const {
10814  SDLoc dl(Op);
10815  // Create a stack slot that is 16-byte aligned.
10817  int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10818  EVT PtrVT = getPointerTy(DAG.getDataLayout());
10819  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10820 
10821  // Store the input value into Value#0 of the stack slot.
10822  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10823  MachinePointerInfo());
10824  // Load it out.
10825  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10826 }
10827 
10828 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10829  SelectionDAG &DAG) const {
10830  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10831  "Should only be called for ISD::INSERT_VECTOR_ELT");
10832 
10833  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10834 
10835  EVT VT = Op.getValueType();
10836  SDLoc dl(Op);
10837  SDValue V1 = Op.getOperand(0);
10838  SDValue V2 = Op.getOperand(1);
10839 
10840  if (VT == MVT::v2f64 && C)
10841  return Op;
10842 
10843  if (Subtarget.hasP9Vector()) {
10844  // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
10845  // because on P10, it allows this specific insert_vector_elt load pattern to
10846  // utilize the refactored load and store infrastructure in order to exploit
10847  // prefixed loads.
10848  // On targets with inexpensive direct moves (Power9 and up), a
10849  // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
10850  // load since a single precision load will involve conversion to double
10851  // precision on the load followed by another conversion to single precision.
10852  if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
10853  (isa<LoadSDNode>(V2))) {
10854  SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
10855  SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
10856  SDValue InsVecElt =
10857  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
10858  BitcastLoad, Op.getOperand(2));
10859  return DAG.getBitcast(MVT::v4f32, InsVecElt);
10860  }
10861  }
10862 
10863  if (Subtarget.isISA3_1()) {
10864  if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
10865  return SDValue();
10866  // On P10, we have legal lowering for constant and variable indices for
10867  // all vectors.
10868  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
10869  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
10870  return Op;
10871  }
10872 
10873  // Before P10, we have legal lowering for constant indices but not for
10874  // variable ones.
10875  if (!C)
10876  return SDValue();
10877 
10878  // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10879  if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10880  SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10881  unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10882  unsigned InsertAtElement = C->getZExtValue();
10883  unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10884  if (Subtarget.isLittleEndian()) {
10885  InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10886  }
10887  return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10888  DAG.getConstant(InsertAtByte, dl, MVT::i32));
10889  }
10890  return Op;
10891 }
10892 
10893 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10894  SelectionDAG &DAG) const {
10895  SDLoc dl(Op);
10896  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10897  SDValue LoadChain = LN->getChain();
10898  SDValue BasePtr = LN->getBasePtr();
10899  EVT VT = Op.getValueType();
10900 
10901  if (VT != MVT::v256i1 && VT != MVT::v512i1)
10902  return Op;
10903 
10904  // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10905  // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10906  // 2 or 4 vsx registers.
10907  assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
10908  "Type unsupported without MMA");
10909  assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10910  "Type unsupported without paired vector support");
10911  Align Alignment = LN->getAlign();
10913  SmallVector<SDValue, 4> LoadChains;
10914  unsigned NumVecs = VT.getSizeInBits() / 128;
10915  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10916  SDValue Load =
10917  DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10918  LN->getPointerInfo().getWithOffset(Idx * 16),
10919  commonAlignment(Alignment, Idx * 16),
10920  LN->getMemOperand()->getFlags(), LN->getAAInfo());
10921  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10922  DAG.getConstant(16, dl, BasePtr.getValueType()));
10923  Loads.push_back(Load);
10924  LoadChains.push_back(Load.getValue(1));
10925  }
10926  if (Subtarget.isLittleEndian()) {
10927  std::reverse(Loads.begin(), Loads.end());
10928  std::reverse(LoadChains.begin(), LoadChains.end());
10929  }
10930  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10931  SDValue Value =
10933  dl, VT, Loads);
10934  SDValue RetOps[] = {Value, TF};
10935  return DAG.getMergeValues(RetOps, dl);
10936 }
10937 
10938 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10939  SelectionDAG &DAG) const {
10940  SDLoc dl(Op);
10941  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10942  SDValue StoreChain = SN->getChain();
10943  SDValue BasePtr = SN->getBasePtr();
10944  SDValue Value = SN->getValue();
10945  EVT StoreVT = Value.getValueType();
10946 
10947  if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10948  return Op;
10949 
10950  // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10951  // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10952  // underlying registers individually.
10953  assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
10954  "Type unsupported without MMA");
10955  assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10956  "Type unsupported without paired vector support");
10957  Align Alignment = SN->getAlign();
10958  SmallVector<SDValue, 4> Stores;
10959  unsigned NumVecs = 2;
10960  if (StoreVT == MVT::v512i1) {
10962  NumVecs = 4;
10963  }
10964  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10965  unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10967  DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
10968  SDValue Store =
10969  DAG.getStore(StoreChain, dl, Elt, BasePtr,
10970  SN->getPointerInfo().getWithOffset(Idx * 16),
10971  commonAlignment(Alignment, Idx * 16),
10972  SN->getMemOperand()->getFlags(), SN->getAAInfo());
10973  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10974  DAG.getConstant(16, dl, BasePtr.getValueType()));
10975  Stores.push_back(Store);
10976  }
10977  SDValue TF = DAG.getTokenFactor(dl, Stores);
10978  return TF;
10979 }
10980 
10981 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10982  SDLoc dl(Op);
10983  if (Op.getValueType() == MVT::v4i32) {
10984  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10985 
10986  SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10987  // +16 as shift amt.
10988  SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10989  SDValue RHSSwap = // = vrlw RHS, 16
10990  BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10991 
10992  // Shrinkify inputs to v8i16.
10993  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10994  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10995  RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10996 
10997  // Low parts multiplied together, generating 32-bit results (we ignore the
10998  // top parts).
10999  SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11000  LHS, RHS, DAG, dl, MVT::v4i32);
11001 
11002  SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11003  LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11004  // Shift the high parts up 16 bits.
11005  HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11006  Neg16, DAG, dl);
11007  return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11008  } else if (Op.getValueType() == MVT::v16i8) {
11009  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11010  bool isLittleEndian = Subtarget.isLittleEndian();
11011 
11012  // Multiply the even 8-bit parts, producing 16-bit sums.
11013  SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11014  LHS, RHS, DAG, dl, MVT::v8i16);
11015  EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11016 
11017  // Multiply the odd 8-bit parts, producing 16-bit sums.
11018  SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11019  LHS, RHS, DAG, dl, MVT::v8i16);
11020  OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11021 
11022  // Merge the results together. Because vmuleub and vmuloub are
11023  // instructions with a big-endian bias, we must reverse the
11024  // element numbering and reverse the meaning of "odd" and "even"
11025  // when generating little endian code.
11026  int Ops[16];
11027  for (unsigned i = 0; i != 8; ++i) {
11028  if (isLittleEndian) {
11029  Ops[i*2 ] = 2*i;
11030  Ops[i*2+1] = 2*i+16;
11031  } else {
11032  Ops[i*2 ] = 2*i+1;
11033  Ops[i*2+1] = 2*i+1+16;
11034  }
11035  }
11036  if (isLittleEndian)
11037  return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11038  else
11039  return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11040  } else {
11041  llvm_unreachable("Unknown mul to lower!");
11042  }
11043 }
11044 
11045 SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11046  bool IsStrict = Op->isStrictFPOpcode();
11047  if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11048  !Subtarget.hasP9Vector())
11049  return SDValue();
11050 
11051  return Op;
11052 }
11053 
11054 // Custom lowering for fpext vf32 to v2f64
11055 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11056 
11057  assert(Op.getOpcode() == ISD::FP_EXTEND &&
11058  "Should only be called for ISD::FP_EXTEND");
11059 
11060  // FIXME: handle extends from half precision float vectors on P9.
11061  // We only want to custom lower an extend from v2f32 to v2f64.
11062  if (Op.getValueType() != MVT::v2f64 ||
11063  Op.getOperand(0).getValueType() != MVT::v2f32)
11064  return SDValue();
11065 
11066  SDLoc dl(Op);
11067  SDValue Op0 = Op.getOperand(0);
11068 
11069  switch (Op0.getOpcode()) {
11070  default:
11071  return SDValue();
11072  case ISD::EXTRACT_SUBVECTOR: {
11073  assert(Op0.getNumOperands() == 2 &&
11074  isa<ConstantSDNode>(Op0->getOperand(1)) &&
11075  "Node should have 2 operands with second one being a constant!");
11076 
11077  if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11078  return SDValue();
11079 
11080  // Custom lower is only done for high or low doubleword.
11081  int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
11082  if (Idx % 2 != 0)
11083  return SDValue();
11084 
11085  // Since input is v4f32, at this point Idx is either 0 or 2.
11086  // Shift to get the doubleword position we want.
11087  int DWord = Idx >> 1;
11088 
11089  // High and low word positions are different on little endian.
11090  if (Subtarget.isLittleEndian())
11091  DWord ^= 0x1;
11092 
11093  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11094  Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11095  }
11096  case ISD::FADD:
11097  case ISD::FMUL:
11098  case ISD::FSUB: {
11099  SDValue NewLoad[2];
11100  for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11101  // Ensure both input are loads.
11102  SDValue LdOp = Op0.getOperand(i);
11103  if (LdOp.getOpcode() != ISD::LOAD)
11104  return SDValue();
11105  // Generate new load node.
11106  LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11107  SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11108  NewLoad[i] = DAG.getMemIntrinsicNode(
11109  PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11110  LD->getMemoryVT(), LD->getMemOperand());
11111  }
11112  SDValue NewOp =
11113  DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11114  NewLoad[1], Op0.getNode()->getFlags());
11115  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11116  DAG.getConstant(0, dl, MVT::i32));
11117  }
11118  case ISD::LOAD: {
11119  LoadSDNode *LD = cast<LoadSDNode>(Op0);
11120  SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11121  SDValue NewLd = DAG.getMemIntrinsicNode(
11122  PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11123  LD->getMemoryVT(), LD->getMemOperand());
11124  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11125  DAG.getConstant(0, dl, MVT::i32));
11126  }
11127  }
11128  llvm_unreachable("ERROR:Should return for all cases within swtich.");
11129 }
11130 
11131 /// LowerOperation - Provide custom lowering hooks for some operations.
11132 ///
11134  switch (Op.getOpcode()) {
11135  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11136  case ISD::FPOW: return lowerPow(Op, DAG);
11137  case ISD::FSIN: return lowerSin(Op, DAG);
11138  case ISD::FCOS: return lowerCos(Op, DAG);
11139  case ISD::FLOG: return lowerLog(Op, DAG);
11140  case ISD::FLOG10: return lowerLog10(Op, DAG);
11141  case ISD::FEXP: return lowerExp(Op, DAG);
11142  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11143  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11144  case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11145  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11146  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11147  case ISD::STRICT_FSETCC:
11148  case ISD::STRICT_FSETCCS:
11149  case ISD::SETCC: return LowerSETCC(Op, DAG);
11150  case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11151  case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11152 
11153  case ISD::INLINEASM:
11154  case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11155  // Variable argument lowering.
11156  case ISD::VASTART: return LowerVASTART(Op, DAG);
11157  case ISD::VAARG: return LowerVAARG(Op, DAG);
11158  case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11159 
11160  case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11161  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11163  return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11164 
11165  // Exception handling lowering.
11166  case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11167  case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11168  case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11169 
11170  case ISD::LOAD: return LowerLOAD(Op, DAG);
11171  case ISD::STORE: return LowerSTORE(Op, DAG);
11172  case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11173  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11176  case ISD::FP_TO_UINT:
11177  case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11180  case ISD::UINT_TO_FP:
11181  case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11182  case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
11183 
11184  // Lower 64-bit shifts.
11185  case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11186  case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11187  case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11188 
11189  case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11190  case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11191 
11192  // Vector-related lowering.
11193  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11194  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11195  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11196  case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11197  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11198  case ISD::MUL: return LowerMUL(Op, DAG);
11199  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11200  case ISD::STRICT_FP_ROUND:
11201  case ISD::FP_ROUND:
11202  return LowerFP_ROUND(Op, DAG);
11203  case ISD::ROTL: return LowerROTL(Op, DAG);
11204 
11205  // For counter-based loop handling.
11206  case ISD::INTRINSIC_W_CHAIN: return SDValue();
11207 
11208  case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11209 
11210  // Frame & Return address.
11211  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11212  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11213 
11214  case ISD::INTRINSIC_VOID:
11215  return LowerINTRINSIC_VOID(Op, DAG);
11216  case ISD::BSWAP:
11217  return LowerBSWAP(Op, DAG);
11218  case ISD::ATOMIC_CMP_SWAP:
11219  return LowerATOMIC_CMP_SWAP(Op, DAG);
11220  case ISD::ATOMIC_STORE:
11221  return LowerATOMIC_LOAD_STORE(Op, DAG);
11222  }
11223 }
11224 
11227  SelectionDAG &DAG) const {
11228  SDLoc dl(N);
11229  switch (N->getOpcode()) {
11230  default:
11231  llvm_unreachable("Do not know how to custom type legalize this operation!");
11232  case ISD::ATOMIC_LOAD: {
11233  SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11234  Results.push_back(Res);
11235  Results.push_back(Res.getValue(1));
11236  break;
11237  }
11238  case ISD::READCYCLECOUNTER: {
11240  SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11241 
11242  Results.push_back(
11243  DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11244  Results.push_back(RTB.getValue(2));
11245  break;
11246  }
11247  case ISD::INTRINSIC_W_CHAIN: {
11248  if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
11249  Intrinsic::loop_decrement)
11250  break;
11251 
11252  assert(N->getValueType(0) == MVT::i1 &&
11253  "Unexpected result type for CTR decrement intrinsic");
11254  EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11255  N->getValueType(0));
11256  SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11257  SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11258  N->getOperand(1));
11259 
11260  Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11261  Results.push_back(NewInt.getValue(1));
11262  break;
11263  }
11264  case ISD::INTRINSIC_WO_CHAIN: {
11265  switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
11266  case Intrinsic::ppc_pack_longdouble:
11267  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11268  N->getOperand(2), N->getOperand(1)));
11269  break;
11270  case Intrinsic::ppc_maxfe:
11271  case Intrinsic::ppc_minfe:
11272  case Intrinsic::ppc_fnmsub:
11273  case Intrinsic::ppc_convert_f128_to_ppcf128:
11274  Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11275  break;
11276  }
11277  break;
11278  }
11279  case ISD::VAARG: {
11280  if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11281  return;
11282 
11283  EVT VT = N->getValueType(0);
11284 
11285  if (VT == MVT::i64) {
11286  SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11287 
11288  Results.push_back(NewNode);
11289  Results.push_back(NewNode.getValue(1));
11290  }
11291  return;
11292  }
11295  case ISD::FP_TO_SINT:
11296  case ISD::FP_TO_UINT: {
11297  // LowerFP_TO_INT() can only handle f32 and f64.
11298  if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11299  MVT::ppcf128)
11300  return;
11301  SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11302  Results.push_back(LoweredValue);
11303  if (N->isStrictFPOpcode())
11304  Results.push_back(LoweredValue.getValue(1));
11305  return;
11306  }
11307  case ISD::TRUNCATE: {
11308  if (!N->getValueType(0).isVector())
11309  return;
11310  SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11311  if (Lowered)
11312  Results.push_back(Lowered);
11313  return;
11314  }
11315  case ISD::FSHL:
11316  case ISD::FSHR:
11317  // Don't handle funnel shifts here.
11318  return;
11319  case ISD::BITCAST:
11320  // Don't handle bitcast here.
11321  return;
11322  case ISD::FP_EXTEND:
11323  SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11324  if (Lowered)
11325  Results.push_back(Lowered);
11326  return;
11327  }
11328 }
11329 
11330 //===----------------------------------------------------------------------===//
11331 // Other Lowering Code
11332 //===----------------------------------------------------------------------===//
11333 
11335  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11337  return Builder.CreateCall(Func, {});
11338 }
11339 
11340 // The mappings for emitLeading/TrailingFence is taken from
11341 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11343  Instruction *Inst,
11344  AtomicOrdering Ord) const {
11346  return callIntrinsic(Builder, Intrinsic::ppc_sync);
11347  if (isReleaseOrStronger(Ord))
11348  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11349  return nullptr;
11350 }
11351 
11353  Instruction *Inst,
11354  AtomicOrdering Ord) const {
11355  if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11356  // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11357  // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11358  // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11359  if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11360  return Builder.CreateCall(
11362  Builder.GetInsertBlock()->getParent()->getParent(),
11363  Intrinsic::ppc_cfence, {Inst->getType()}),
11364  {Inst});
11365  // FIXME: Can use isync for rmw operation.
11366  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11367  }
11368  return nullptr;
11369 }
11370 
11373  unsigned AtomicSize,
11374  unsigned BinOpcode,
11375  unsigned CmpOpcode,
11376  unsigned CmpPred) const {
11377  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11378  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11379 
11380  auto LoadMnemonic = PPC::LDARX;
11381  auto StoreMnemonic = PPC::STDCX;
11382  switch (AtomicSize) {
11383  default:
11384  llvm_unreachable("Unexpected size of atomic entity");
11385  case 1:
11386  LoadMnemonic = PPC::LBARX;
11387  StoreMnemonic = PPC::STBCX;
11388  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11389  break;
11390  case 2:
11391  LoadMnemonic = PPC::LHARX;
11392  StoreMnemonic = PPC::STHCX;
11393  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11394  break;
11395  case 4:
11396  LoadMnemonic = PPC::LWARX;
11397  StoreMnemonic = PPC::STWCX;
11398  break;
11399  case 8:
11400  LoadMnemonic = PPC::LDARX;
11401  StoreMnemonic = PPC::STDCX;
11402  break;
11403  }
11404 
11405  const BasicBlock *LLVM_BB = BB->getBasicBlock();
11406  MachineFunction *F = BB->getParent();
11407  MachineFunction::iterator It = ++BB->getIterator();
11408 
11409  Register dest = MI.getOperand(0).getReg();
11410  Register ptrA = MI.getOperand(1).getReg();
11411  Register ptrB = MI.getOperand(2).getReg();
11412  Register incr = MI.getOperand(3).getReg();
11413  DebugLoc dl = MI.getDebugLoc();
11414 
11415  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11416  MachineBasicBlock *loop2MBB =
11417  CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11418  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11419  F->insert(It, loopMBB);
11420  if (CmpOpcode)
11421  F->insert(It, loop2MBB);
11422  F->insert(It, exitMBB);
11423  exitMBB->splice(exitMBB->begin(), BB,
11424  std::next(MachineBasicBlock::iterator(MI)), BB->end());
11426 
11427  MachineRegisterInfo &RegInfo = F->getRegInfo();
11428  Register TmpReg = (!BinOpcode) ? incr :
11429  RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11430  : &PPC::GPRCRegClass);
11431 
11432  // thisMBB:
11433  // ...
11434  // fallthrough --> loopMBB
11435  BB->addSuccessor(loopMBB);
11436 
11437  // loopMBB:
11438  // l[wd]arx dest, ptr
11439  // add r0, dest, incr
11440  // st[wd]cx. r0, ptr
11441  // bne- loopMBB
11442  // fallthrough --> exitMBB
11443 
11444  // For max/min...
11445  // loopMBB:
11446  // l[wd]arx dest, ptr
11447  // cmpl?[wd] incr, dest
11448  // bgt exitMBB
11449  // loop2MBB:
11450  // st[wd]cx. dest, ptr
11451  // bne- loopMBB
11452  // fallthrough --> exitMBB
11453 
11454  BB = loopMBB;
11455  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11456  .addReg(ptrA).addReg(ptrB);
11457  if (BinOpcode)
11458  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11459  if (CmpOpcode) {
11460  // Signed comparisons of byte or halfword values must be sign-extended.
11461  if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11462  Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11463  BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11464  ExtReg).addReg(dest);
11465  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11466  .addReg(incr).addReg(ExtReg);
11467  } else
11468  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11469  .addReg(incr).addReg(dest);
11470 
11471  BuildMI(BB, dl, TII->get(PPC::BCC))
11472  .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
11473  BB->addSuccessor(loop2MBB);
11474  BB->addSuccessor(exitMBB);
11475  BB = loop2MBB;
11476  }
11477  BuildMI(BB, dl, TII->get(StoreMnemonic))
11478  .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11479  BuildMI(BB, dl, TII->get(PPC::BCC))
11480  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11481  BB->addSuccessor(loopMBB);
11482  BB->addSuccessor(exitMBB);
11483 
11484  // exitMBB:
11485  // ...
11486  BB = exitMBB;
11487  return BB;
11488 }
11489 
11491  switch(MI.getOpcode()) {
11492  default:
11493  return false;
11494  case PPC::COPY:
11495  return TII->isSignExtended(MI);
11496  case PPC::LHA:
11497  case PPC::LHA8:
11498  case PPC::LHAU:
11499  case PPC::LHAU8:
11500  case PPC::LHAUX:
11501  case PPC::LHAUX8:
11502  case PPC::LHAX:
11503  case PPC::LHAX8:
11504  case PPC::LWA:
11505  case PPC::LWAUX:
11506  case PPC::LWAX:
11507  case PPC::LWAX_32:
11508  case PPC::LWA_32:
11509  case PPC::PLHA:
11510  case PPC::PLHA8:
11511  case PPC::PLHA8pc:
11512  case PPC::PLHApc:
11513  case PPC::PLWA:
11514  case PPC::PLWA8:
11515  case PPC::PLWA8pc:
11516  case PPC::PLWApc:
11517  case PPC::EXTSB:
11518  case PPC::EXTSB8:
11519  case PPC::EXTSB8_32_64:
11520  case PPC::EXTSB8_rec:
11521  case PPC::EXTSB_rec:
11522  case PPC::EXTSH:
11523  case PPC::EXTSH8:
11524  case PPC::EXTSH8_32_64:
11525  case PPC::EXTSH8_rec:
11526  case PPC::EXTSH_rec:
11527  case PPC::EXTSW:
11528  case PPC::EXTSWSLI:
11529  case PPC::EXTSWSLI_32_64:
11530  case PPC::EXTSWSLI_32_64_rec:
11531  case PPC::EXTSWSLI_rec:
11532  case PPC::EXTSW_32:
11533  case PPC::EXTSW_32_64:
11534  case PPC::EXTSW_32_64_rec:
11535  case PPC::EXTSW_rec:
11536  case PPC::SRAW:
11537  case PPC::SRAWI:
11538  case PPC::SRAWI_rec:
11539  case PPC::SRAW_rec:
11540  return true;
11541  }
11542  return false;
11543 }
11544 
11547  bool is8bit, // operation
11548  unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11549  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11550  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
11551 
11552  // If this is a signed comparison and the value being compared is not known
11553  // to be sign extended, sign extend it here.
11554  DebugLoc dl = MI.getDebugLoc();
11555  MachineFunction *F = BB->getParent();
11556  MachineRegisterInfo &RegInfo = F->getRegInfo();
11557  Register incr = MI.getOperand(3).getReg();
11558  bool IsSignExtended = Register::isVirtualRegister(incr) &&
11559  isSignExtended(*RegInfo.getVRegDef(incr), TII);
11560 
11561  if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
11562  Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11563  BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
11564  .addReg(MI.getOperand(3).getReg());
11565  MI.getOperand(3).setReg(ValueReg);
11566  }
11567  // If we support part-word atomic mnemonics, just use them
11568  if (Subtarget.hasPartwordAtomics())
11569  return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11570  CmpPred);
11571 
11572  // In 64 bit mode we have to use 64 bits for addresses, even though the
11573  // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
11574  // registers without caring whether they're 32 or 64, but here we're
11575  // doing actual arithmetic on the addresses.
11576  bool is64bit = Subtarget.isPPC64();
11577  bool isLittleEndian = Subtarget.isLittleEndian();
11578  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11579 
11580  const BasicBlock *LLVM_BB = BB->getBasicBlock();
11581  MachineFunction::iterator It = ++BB->getIterator();
11582 
11583  Register dest = MI.getOperand(0).getReg();
11584  Register ptrA = MI.getOperand(1).getReg();
11585  Register ptrB = MI.getOperand(2).getReg();
11586 
11587  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11588  MachineBasicBlock *loop2MBB =
11589  CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11590  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11591  F->insert(It, loopMBB);
11592  if (CmpOpcode)
11593  F->insert(It, loop2MBB);
11594  F->insert(It, exitMBB);
11595  exitMBB->splice(exitMBB->begin(), BB,
11596  std::next(MachineBasicBlock::iterator(MI)), BB->end());
11598 
11599  const TargetRegisterClass *RC =
11600  is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11601  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11602 
11603  Register PtrReg = RegInfo.createVirtualRegister(RC);
11604  Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11605  Register ShiftReg =
11606  isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11607  Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11608  Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11609  Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11610  Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11611  Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11612  Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11613  Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11614  Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11615  Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
11616  Register Ptr1Reg;
11617  Register TmpReg =
11618  (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11619 
11620  // thisMBB:
11621  // ...
11622  // fallthrough --> loopMBB
11623  BB->addSuccessor(loopMBB);
11624 
11625  // The 4-byte load must be aligned, while a char or short may be
11626  // anywhere in the word. Hence all this nasty bookkeeping code.
11627  // add ptr1, ptrA, ptrB [copy if ptrA==0]
11628  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11629  // xori shift, shift1, 24 [16]
11630  // rlwinm ptr, ptr1, 0, 0, 29
11631  // slw incr2, incr, shift
11632  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11633  // slw mask, mask2, shift
11634  // loopMBB:
11635  // lwarx tmpDest, ptr
11636  // add tmp, tmpDest, incr2
11637  // andc tmp2, tmpDest, mask
11638  // and tmp3, tmp, mask
11639  // or tmp4, tmp3, tmp2
11640  // stwcx. tmp4, ptr
11641  // bne- loopMBB
11642  // fallthrough --> exitMBB
11643  // srw SrwDest, tmpDest, shift
11644  // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
11645  if (ptrA != ZeroReg) {
11646  Ptr1Reg = RegInfo.createVirtualRegister(RC);
11647  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11648  .addReg(ptrA)
11649  .addReg(ptrB);
11650  } else {
11651  Ptr1Reg = ptrB;
11652  }
11653  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11654  // mode.
11655  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11656  .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11657  .addImm(3)
11658  .addImm(27)
11659  .addImm(is8bit ? 28 : 27);
11660  if (!isLittleEndian)
11661  BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11662  .addReg(Shift1Reg)
11663  .addImm(is8bit ? 24 : 16);
11664  if (is64bit)
11665  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11666  .addReg(Ptr1Reg)
11667  .addImm(0)
11668  .addImm(61);
11669  else
11670  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11671  .addReg(Ptr1Reg)
11672  .addImm(0)
11673  .addImm(0)
11674  .addImm(29);
11675  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11676  if (is8bit)
11677  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11678  else {
11679  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11680  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11681  .addReg(Mask3Reg)
11682  .addImm(65535);
11683  }
11684  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11685  .addReg(Mask2Reg)
11686  .addReg(ShiftReg);
11687 
11688  BB = loopMBB;
11689  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11690  .addReg(ZeroReg)
11691  .addReg(PtrReg);
11692  if (BinOpcode)
11693  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11694  .addReg(Incr2Reg)
11695  .addReg(TmpDestReg);
11696  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11697  .addReg(TmpDestReg)
11698  .addReg(MaskReg);
11699  BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11700  if (CmpOpcode) {
11701  // For unsigned comparisons, we can directly compare the shifted values.
11702  // For signed comparisons we shift and sign extend.
11703  Register SReg = RegInfo.createVirtualRegister(GPRC);
11704  BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11705  .addReg(TmpDestReg)
11706  .addReg(MaskReg);
11707  unsigned ValueReg = SReg;
11708  unsigned CmpReg = Incr2Reg;
11709  if (CmpOpcode == PPC::CMPW) {
11710  ValueReg = RegInfo.createVirtualRegister(GPRC);
11711  BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11712  .addReg(SReg)
11713  .addReg(ShiftReg);
11714  Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11715  BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11716  .addReg(ValueReg);
11717  ValueReg = ValueSReg;
11718  CmpReg = incr;
11719  }
11720  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11721  .addReg(CmpReg)
11722  .addReg(ValueReg);
11723  BuildMI(BB, dl, TII->get(PPC::BCC))
11724  .addImm(CmpPred)
11725  .addReg(PPC::CR0)
11726  .addMBB(exitMBB);
11727  BB->addSuccessor(loop2MBB);
11728  BB->addSuccessor(exitMBB);
11729  BB = loop2MBB;
11730  }
11731  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11732  BuildMI(BB, dl, TII->get(PPC::STWCX))
11733  .addReg(Tmp4Reg)
11734  .addReg(ZeroReg)
11735  .addReg(PtrReg);
11736  BuildMI(BB, dl, TII->get(PPC::BCC))
11738  .addReg(PPC::CR0)
11739  .addMBB(loopMBB);
11740  BB->addSuccessor(loopMBB);
11741  BB->addSuccessor(exitMBB);
11742 
11743  // exitMBB:
11744  // ...
11745  BB = exitMBB;
11746  // Since the shift amount is not a constant, we need to clear
11747  // the upper bits with a separate RLWINM.
11748  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
11749  .addReg(SrwDestReg)
11750  .addImm(0)
11751  .addImm(is8bit ? 24 : 16)
11752  .addImm(31);
11753  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
11754  .addReg(TmpDestReg)
11755  .addReg(ShiftReg);
11756  return BB;
11757 }
11758 
11761  MachineBasicBlock *MBB) const {
11762  DebugLoc DL = MI.getDebugLoc();
11763  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11764  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11765 
11766  MachineFunction *MF = MBB->getParent();
11768 
11769  const BasicBlock *BB = MBB->getBasicBlock();
11771 
11772  Register DstReg = MI.getOperand(0).getReg();
11773  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11774  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
11775  Register mainDstReg = MRI.createVirtualRegister(RC);
11776  Register restoreDstReg = MRI.createVirtualRegister(RC);
11777 
11778  MVT PVT = getPointerTy(MF->getDataLayout());
11779  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11780  "Invalid Pointer Size!");
11781  // For v = setjmp(buf), we generate
11782  //
11783  // thisMBB:
11784  // SjLjSetup mainMBB
11785  // bl mainMBB
11786  // v_restore = 1
11787  // b sinkMBB
11788  //
11789  // mainMBB:
11790  // buf[LabelOffset] = LR
11791  // v_main = 0
11792  //
11793  // sinkMBB:
11794  // v = phi(main, restore)
11795  //
11796 
11797  MachineBasicBlock *thisMBB = MBB;
11800  MF->insert(I, mainMBB);
11801  MF->insert(I, sinkMBB);
11802 
11803  MachineInstrBuilder MIB;
11804 
11805  // Transfer the remainder of BB and its successor edges to sinkMBB.
11806  sinkMBB->splice(sinkMBB->begin(), MBB,
11807  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11809 
11810  // Note that the structure of the jmp_buf used here is not compatible
11811  // with that used by libc, and is not designed to be. Specifically, it
11812  // stores only those 'reserved' registers that LLVM does not otherwise
11813  // understand how to spill. Also, by convention, by the time this
11814  // intrinsic is called, Clang has already stored the frame address in the
11815  // first slot of the buffer and stack address in the third. Following the
11816  // X86 target code, we'll store the jump address in the second slot. We also
11817  // need to save the TOC pointer (R2) to handle jumps between shared
11818  // libraries, and that will be stored in the fourth slot. The thread
11819  // identifier (R13) is not affected.
11820 
11821  // thisMBB:
11822  const int64_t LabelOffset = 1 * PVT.getStoreSize();
11823  const int64_t TOCOffset = 3 * PVT.getStoreSize();
11824  const int64_t BPOffset = 4 * PVT.getStoreSize();
11825 
11826  // Prepare IP either in reg.
11827  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11828  Register LabelReg = MRI.createVirtualRegister(PtrRC);
11829  Register BufReg = MI.getOperand(1).getReg();
11830 
11831  if (Subtarget.is64BitELFABI()) {
11833  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11834  .addReg(PPC::X2)
11835  .addImm(TOCOffset)
11836  .addReg(BufReg)
11837  .cloneMemRefs(MI);
11838  }
11839 
11840  // Naked functions never have a base pointer, and so we use r1. For all
11841  // other functions, this decision must be delayed until during PEI.
11842  unsigned BaseReg;
11843  if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11844  BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11845  else
11846  BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11847 
11848  MIB = BuildMI(*thisMBB, MI, DL,
11849  TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11850  .addReg(BaseReg)
11851  .addImm(BPOffset)
11852  .addReg(BufReg)
11853  .cloneMemRefs(MI);
11854 
11855  // Setup
11856  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11858 
11859  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11860 
11861  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11862  .addMBB(mainMBB);
11863  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11864 
11865  thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11866  thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11867 
11868  // mainMBB:
11869  // mainDstReg = 0
11870  MIB =
11871  BuildMI(mainMBB, DL,
11872  TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11873 
11874  // Store IP
11875  if (Subtarget.isPPC64()) {
11876  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11877  .addReg(LabelReg)
11878  .addImm(LabelOffset)
11879  .addReg(BufReg);
11880  } else {
11881  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11882  .addReg(LabelReg)
11883  .addImm(LabelOffset)
11884  .addReg(BufReg);
11885  }
11886  MIB.cloneMemRefs(MI);
11887 
11888  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11889  mainMBB->addSuccessor(sinkMBB);
11890 
11891  // sinkMBB:
11892  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11893  TII->get(PPC::PHI), DstReg)
11894  .addReg(mainDstReg).addMBB(mainMBB)
11895  .addReg(restoreDstReg).addMBB(thisMBB);
11896 
11897  MI.eraseFromParent();
11898  return sinkMBB;
11899 }
11900 
11903  MachineBasicBlock *MBB) const {
11904  DebugLoc DL = MI.getDebugLoc();
11905  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11906 
11907  MachineFunction *MF = MBB->getParent();
11909 
11910  MVT PVT = getPointerTy(MF->getDataLayout());
11911  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11912  "Invalid Pointer Size!");
11913 
11914  const TargetRegisterClass *RC =
11915  (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11917  // Since FP is only updated here but NOT referenced, it's treated as GPR.
11918  unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11919  unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11920  unsigned BP =
11921  (PVT == MVT::i64)
11922  ? PPC::X30
11923  : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11924  : PPC::R30);
11925 
11926  MachineInstrBuilder MIB;
11927 
11928  const int64_t LabelOffset = 1 * PVT.getStoreSize();
11929  const int64_t SPOffset = 2 * PVT.getStoreSize();
11930  const int64_t TOCOffset = 3 * PVT.getStoreSize();
11931  const int64_t BPOffset = 4 * PVT.getStoreSize();
11932 
11933  Register BufReg = MI.getOperand(0).getReg();
11934 
11935  // Reload FP (the jumped-to function may not have had a
11936  // frame pointer, and if so, then its r31 will be restored
11937  // as necessary).
11938  if (PVT == MVT::i64) {
11939  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11940  .addImm(0)
11941  .addReg(BufReg);
11942  } else {
11943  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11944  .addImm(0)
11945  .addReg(BufReg);
11946  }
11947  MIB.cloneMemRefs(MI);
11948 
11949  // Reload IP
11950  if (PVT == MVT::i64) {
11951  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11952  .addImm(LabelOffset)
11953  .addReg(BufReg);
11954  } else {
11955  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11956  .addImm(LabelOffset)
11957  .addReg(BufReg);
11958  }
11959  MIB.cloneMemRefs(MI);
11960 
11961  // Reload SP
11962  if (PVT == MVT::i64) {
11963  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11964  .addImm(SPOffset)
11965  .addReg(BufReg);
11966  } else {
11967  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11968  .addImm(SPOffset)
11969  .addReg(BufReg);
11970  }
11971  MIB.cloneMemRefs(MI);
11972 
11973  // Reload BP
11974  if (PVT == MVT::i64) {
11975  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11976  .addImm(BPOffset)
11977  .addReg(BufReg);
11978  } else {
11979  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11980  .addImm(BPOffset)
11981  .addReg(BufReg);
11982  }
11983  MIB.cloneMemRefs(MI);
11984 
11985  // Reload TOC
11986  if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11988  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11989  .addImm(TOCOffset)
11990  .addReg(BufReg)
11991  .cloneMemRefs(MI);
11992  }
11993 
11994  // Jump
11995  BuildMI(*MBB, MI, DL,
11996  TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11997  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11998 
11999  MI.eraseFromParent();
12000  return MBB;
12001 }
12002 
12004  // If the function specifically requests inline stack probes, emit them.
12005  if (MF.getFunction().hasFnAttribute("probe-stack"))
12006  return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12007  "inline-asm";
12008  return false;
12009 }
12010 
12012  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12013  unsigned StackAlign = TFI->getStackAlignment();
12015  "Unexpected stack alignment");
12016  // The default stack probe size is 4096 if the function has no
12017  // stack-probe-size attribute.
12018  unsigned StackProbeSize = 4096;
12019  const Function &Fn = MF.getFunction();
12020  if (Fn.hasFnAttribute("stack-probe-size"))
12021  Fn.getFnAttribute("stack-probe-size")
12022  .getValueAsString()
12023  .getAsInteger(0, StackProbeSize);
12024  // Round down to the stack alignment.
12025  StackProbeSize &= ~(StackAlign - 1);
12026  return StackProbeSize ? StackProbeSize : StackAlign;
12027 }
12028 
12029 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12030 // into three phases. In the first phase, it uses pseudo instruction
12031 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12032 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12033 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12034 // MaxCallFrameSize so that it can calculate correct data area pointer.
12037  MachineBasicBlock *MBB) const {
12038  const bool isPPC64 = Subtarget.isPPC64();
12039  MachineFunction *MF = MBB->getParent();
12040  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12041  DebugLoc DL = MI.getDebugLoc();
12042  const unsigned ProbeSize = getStackProbeSize(*MF);
12043  const BasicBlock *ProbedBB = MBB->getBasicBlock();
12045  // The CFG of probing stack looks as
12046  // +-----+
12047  // | MBB |
12048  // +--+--+
12049  // |
12050  // +----v----+
12051  // +--->+ TestMBB +---+
12052  // | +----+----+ |
12053  // | | |
12054  // | +-----v----+ |
12055  // +---+ BlockMBB | |
12056  // +----------+ |
12057  // |
12058  // +---------+ |
12059  // | TailMBB +<--+
12060  // +---------+
12061  // In MBB, calculate previous frame pointer and final stack pointer.
12062  // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12063  // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12064  // TailMBB is spliced via \p MI.
12065  MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12066  MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12067  MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12068 
12069  MachineFunction::iterator MBBIter = ++MBB->getIterator();
12070  MF->insert(MBBIter, TestMBB);
12071  MF->insert(MBBIter, BlockMBB);
12072  MF->insert(MBBIter, TailMBB);
12073 
12074  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12075  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12076 
12077  Register DstReg = MI.getOperand(0).getReg();
12078  Register NegSizeReg = MI.getOperand(1).getReg();
12079  Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12080  Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12081  Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12082  Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12083 
12084  // Since value of NegSizeReg might be realigned in prologepilog, insert a
12085  // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12086  // NegSize.
12087  unsigned ProbeOpc;
12088  if (!MRI.hasOneNonDBGUse(NegSizeReg))
12089  ProbeOpc =
12090  isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12091  else
12092  // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12093  // and NegSizeReg will be allocated in the same phyreg to avoid
12094  // redundant copy when NegSizeReg has only one use which is current MI and
12095  // will be replaced by PREPARE_PROBED_ALLOCA then.
12096  ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12097  : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12098  BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12099  .addDef(ActualNegSizeReg)
12100  .addReg(NegSizeReg)
12101  .add(MI.getOperand(2))
12102  .add(MI.getOperand(3));
12103 
12104  // Calculate final stack pointer, which equals to SP + ActualNegSize.
12105  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12106  FinalStackPtr)
12107  .addReg(SPReg)
12108  .addReg(ActualNegSizeReg);
12109 
12110  // Materialize a scratch register for update.
12111  int64_t NegProbeSize = -(int64_t)ProbeSize;
12112  assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12113  Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12114  if (!isInt<16>(NegProbeSize)) {
12115  Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12116  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12117  .addImm(NegProbeSize >> 16);
12118  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12119  ScratchReg)
12120  .addReg(TempReg)
12121  .addImm(NegProbeSize & 0xFFFF);
12122  } else
12123  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12124  .addImm(NegProbeSize);
12125 
12126  {
12127  // Probing leading residual part.
12128  Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12129  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12130  .addReg(ActualNegSizeReg)
12131  .addReg(ScratchReg);
12132  Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12133  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12134  .addReg(Div)
12135  .addReg(ScratchReg);
12136  Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12137  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12138  .addReg(Mul)
12139  .addReg(ActualNegSizeReg);
12140  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12141  .addReg(FramePointer)
12142  .addReg(SPReg)
12143  .addReg(NegMod);
12144  }
12145 
12146  {
12147  // Remaining part should be multiple of ProbeSize.
12148  Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12149  BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12150  .addReg(SPReg)
12151  .addReg(FinalStackPtr);
12152  BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12154  .addReg(CmpResult)
12155  .addMBB(TailMBB);
12156  TestMBB->addSuccessor(BlockMBB);
12157  TestMBB->addSuccessor(TailMBB);
12158  }
12159 
12160  {
12161  // Touch the block.
12162  // |P...|P...|P...
12163  BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12164  .addReg(FramePointer)
12165  .addReg(SPReg)
12166  .addReg(ScratchReg);
12167  BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12168  BlockMBB->addSuccessor(TestMBB);
12169  }
12170 
12171  // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12172  // DYNAREAOFFSET pseudo instruction to get the future result.
12173  Register MaxCallFrameSizeReg =
12174  MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12175  BuildMI(TailMBB, DL,
12176  TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12177  MaxCallFrameSizeReg)
12178  .add(MI.getOperand(2))
12179  .add(MI.getOperand(3));
12180  BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12181  .addReg(SPReg)
12182  .addReg(MaxCallFrameSizeReg);
12183 
12184  // Splice instructions after MI to TailMBB.
12185  TailMBB->splice(TailMBB->end(), MBB,
12186  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12188  MBB->addSuccessor(TestMBB);
12189 
12190  // Delete the pseudo instruction.
12191  MI.eraseFromParent();
12192 
12193  ++NumDynamicAllocaProbed;
12194  return TailMBB;
12195 }
12196 
12199  MachineBasicBlock *BB) const {
12200  if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12201  MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12202  if (Subtarget.is64BitELFABI() &&
12203  MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12204  !Subtarget.isUsingPCRelativeCalls()) {
12205  // Call lowering should have added an r2 operand to indicate a dependence
12206  // on the TOC base pointer value. It can't however, because there is no
12207  // way to mark the dependence as implicit there, and so the stackmap code
12208  // will confuse it with a regular operand. Instead, add the dependence
12209  // here.
12210  MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12211  }
12212 
12213  return emitPatchPoint(MI, BB);
12214  }
12215 
12216  if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12217  MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12218  return emitEHSjLjSetJmp(MI, BB);
12219  } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12220  MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12221  return emitEHSjLjLongJmp(MI, BB);
12222  }
12223 
12224  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12225 
12226  // To "insert" these instructions we actually have to insert their
12227  // control-flow patterns.
12228  const BasicBlock *LLVM_BB = BB->getBasicBlock();
12229  MachineFunction::iterator It = ++BB->getIterator();
12230 
12231  MachineFunction *F = BB->getParent();
12232  MachineRegisterInfo &MRI = F->getRegInfo();
12233 
12234  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12235  MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
12236  MI.getOpcode() == PPC::SELECT_I8) {
12238  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12239  MI.getOpcode() == PPC::SELECT_CC_I8)
12240  Cond.push_back(MI.getOperand(4));
12241  else
12243  Cond.push_back(MI.getOperand(1));
12244 
12245  DebugLoc dl = MI.getDebugLoc();
12246  TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12247  MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12248  } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
12249  MI.getOpcode() == PPC::SELECT_CC_F8 ||
12250  MI.getOpcode() == PPC::SELECT_CC_F16 ||
12251  MI.getOpcode() == PPC::SELECT_CC_VRRC ||
12252  MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
12253  MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
12254  MI.getOpcode() == PPC::SELECT_CC_VSRC ||
12255  MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
12256  MI.getOpcode() == PPC::SELECT_CC_SPE ||
12257  MI.getOpcode() == PPC::SELECT_F4 ||
12258  MI.getOpcode() == PPC::SELECT_F8 ||
12259  MI.getOpcode() == PPC::SELECT_F16 ||
12260  MI.getOpcode() == PPC::SELECT_SPE ||
12261  MI.getOpcode() == PPC::SELECT_SPE4 ||
12262  MI.getOpcode() == PPC::SELECT_VRRC ||
12263  MI.getOpcode() == PPC::SELECT_VSFRC ||
12264  MI.getOpcode() == PPC::SELECT_VSSRC ||
12265  MI.getOpcode() == PPC::SELECT_VSRC) {
12266  // The incoming instruction knows the destination vreg to set, the
12267  // condition code register to branch on, the true/false values to
12268  // select between, and a branch opcode to use.
12269 
12270  // thisMBB:
12271  // ...
12272  // TrueVal = ...
12273  // cmpTY ccX, r1, r2
12274  // bCC copy1MBB
12275  // fallthrough --> copy0MBB
12276  MachineBasicBlock *thisMBB = BB;
12277  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12278  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12279  DebugLoc dl = MI.getDebugLoc();
12280  F->insert(It, copy0MBB);
12281  F->insert(It, sinkMBB);
12282 
12283  // Transfer the remainder of BB and its successor edges to sinkMBB.
12284  sinkMBB->splice(sinkMBB->begin(), BB,
12285  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12287 
12288  // Next, add the true and fallthrough blocks as its successors.
12289  BB->addSuccessor(copy0MBB);
12290  BB->addSuccessor(sinkMBB);
12291 
12292  if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
12293  MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
12294  MI.getOpcode() == PPC::SELECT_F16 ||
12295  MI.getOpcode() == PPC::SELECT_SPE4 ||
12296  MI.getOpcode() == PPC::SELECT_SPE ||
12297  MI.getOpcode() == PPC::SELECT_VRRC ||
12298  MI.getOpcode() == PPC::SELECT_VSFRC ||
12299  MI.getOpcode() == PPC::SELECT_VSSRC ||
12300  MI.getOpcode() == PPC::SELECT_VSRC) {
12301  BuildMI(BB, dl, TII->get(PPC::BC))
12302  .addReg(MI.getOperand(1).getReg())
12303  .addMBB(sinkMBB);
12304  } else {
12305  unsigned SelectPred = MI.getOperand(4).getImm();
12306  BuildMI(BB, dl, TII->get(PPC::BCC))
12307  .addImm(SelectPred)
12308  .addReg(MI.getOperand(1).getReg())
12309  .addMBB(sinkMBB);
12310  }
12311 
12312  // copy0MBB:
12313  // %FalseValue = ...
12314  // # fallthrough to sinkMBB
12315  BB = copy0MBB;
12316 
12317  // Update machine-CFG edges
12318  BB->addSuccessor(sinkMBB);
12319 
12320  // sinkMBB:
12321  // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12322  // ...
12323  BB = sinkMBB;
12324  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12325  .addReg(MI.getOperand(3).getReg())
12326  .addMBB(copy0MBB)
12327  .addReg(MI.getOperand(2).getReg())
12328  .addMBB(thisMBB);
12329  } else if (MI.getOpcode() == PPC::ReadTB) {
12330  // To read the 64-bit time-base register on a 32-bit target, we read the
12331  // two halves. Should the counter have wrapped while it was being read, we
12332  // need to try again.
12333  // ...
12334  // readLoop:
12335  // mfspr Rx,TBU # load from TBU
12336  // mfspr Ry,TB # load from TB
12337  // mfspr Rz,TBU # load from TBU
12338  // cmpw crX,Rx,Rz # check if 'old'='new'
12339  // bne readLoop # branch if they're not equal
12340  // ...
12341 
12342  MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12343  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12344  DebugLoc dl = MI.getDebugLoc();
12345  F->insert(It, readMBB);
12346  F->insert(It, sinkMBB);
12347 
12348  // Transfer the remainder of BB and its successor edges to sinkMBB.
12349  sinkMBB->splice(sinkMBB->begin(), BB,
12350  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12352 
12353  BB->addSuccessor(readMBB);
12354  BB = readMBB;
12355 
12356  MachineRegisterInfo &RegInfo = F->getRegInfo();
12357  Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12358  Register LoReg = MI.getOperand(0).getReg();
12359  Register HiReg = MI.getOperand(1).getReg();
12360 
12361  BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12362  BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12363  BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12364 
12365  Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12366 
12367  BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12368  .addReg(HiReg)
12369  .addReg(ReadAgainReg);
12370  BuildMI(BB, dl, TII->get(PPC::BCC))
12372  .addReg(CmpReg)
12373  .addMBB(readMBB);
12374 
12375  BB->addSuccessor(readMBB);
12376  BB->addSuccessor(sinkMBB);
12377  } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12378  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12379  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12380  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12381  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12382  BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12383  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12384  BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12385 
12386  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12388  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12390  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12391  BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12392  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12393  BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12394 
12395  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12397  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12398  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12399  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12400  BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12401  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12402  BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12403 
12404  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12406  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12408  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12409  BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12410  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12411  BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12412 
12413  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12414  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12415  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12416  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12417  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12418  BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12419  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12420  BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12421 
12422  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12423  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12424  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12425  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12426  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12427  BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12428  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12429  BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12430 
12431  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12432  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
12433  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12434  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
12435  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12436  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
12437  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12438  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
12439 
12440  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12441  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
12442  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12443  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
12444  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12445  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
12446  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12447  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
12448 
12449  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12450  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
12451  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12452  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
12453  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12454  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
12455  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12456  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
12457 
12458  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12459  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
12460  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12461  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
12462  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12463  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
12464  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12465  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
12466 
12467  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12468  BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12469  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12470  BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12471  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12472  BB = EmitAtomicBinary(MI, BB, 4, 0);
12473  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12474  BB = EmitAtomicBinary(MI, BB, 8, 0);
12475  else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12476  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12477  (Subtarget.hasPartwordAtomics() &&
12478  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12479  (Subtarget.hasPartwordAtomics() &&
12480  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12481  bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12482 
12483  auto LoadMnemonic = PPC::LDARX;
12484  auto StoreMnemonic = PPC::STDCX;
12485  switch (MI.getOpcode()) {
12486  default:
12487  llvm_unreachable("Compare and swap of unknown size");
12488  case PPC::ATOMIC_CMP_SWAP_I8:
12489  LoadMnemonic = PPC::LBARX;
12490  StoreMnemonic = PPC::STBCX;
12491  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12492  break;
12493  case PPC::ATOMIC_CMP_SWAP_I16:
12494  LoadMnemonic = PPC::LHARX;
12495  StoreMnemonic = PPC::STHCX;
12496  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12497  break;
12498  case PPC::ATOMIC_CMP_SWAP_I32:
12499  LoadMnemonic = PPC::LWARX;
12500  StoreMnemonic = PPC::STWCX;
12501  break;
12502  case PPC::ATOMIC_CMP_SWAP_I64:
12503  LoadMnemonic = PPC::LDARX;
12504  StoreMnemonic = PPC::STDCX;
12505  break;
12506  }
12507  Register dest = MI.getOperand(0).getReg();
12508  Register ptrA = MI.getOperand(1).getReg();
12509  Register ptrB = MI.getOperand(2).getReg();
12510  Register oldval = MI.getOperand(3).getReg();
12511  Register newval = MI.getOperand(4).getReg();
12512  DebugLoc dl = MI.getDebugLoc();
12513 
12514  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12515  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12516  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12517  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12518  F->insert(It, loop1MBB);
12519  F->insert(It, loop2MBB);
12520  F->insert(It, midMBB);
12521  F->insert(It, exitMBB);
12522  exitMBB->splice(exitMBB->begin(), BB,
12523  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12525 
12526  // thisMBB:
12527  // ...
12528  // fallthrough --> loopMBB
12529  BB->addSuccessor(loop1MBB);
12530 
12531  // loop1MBB:
12532  // l[bhwd]arx dest, ptr
12533  // cmp[wd] dest, oldval
12534  // bne- midMBB
12535  // loop2MBB:
12536  // st[bhwd]cx. newval, ptr
12537  // bne- loopMBB
12538  // b exitBB
12539  // midMBB:
12540  // st[bhwd]cx. dest, ptr
12541  // exitBB:
12542  BB = loop1MBB;
12543  BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12544  BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12545  .addReg(oldval)
12546  .addReg(dest);
12547  BuildMI(BB, dl, TII->get(PPC::BCC))
12549  .addReg(PPC::CR0)
12550  .addMBB(midMBB);
12551  BB->addSuccessor(loop2MBB);
12552  BB->addSuccessor(midMBB);
12553 
12554  BB = loop2MBB;
12555  BuildMI(BB, dl, TII->get(StoreMnemonic))
12556  .addReg(newval)
12557  .addReg(ptrA)
12558  .addReg(ptrB);
12559  BuildMI(BB, dl, TII->get(PPC::BCC))
12561  .addReg(PPC::CR0)
12562  .addMBB(loop1MBB);
12563  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12564  BB->addSuccessor(loop1MBB);
12565  BB->addSuccessor(exitMBB);
12566 
12567  BB = midMBB;
12568  BuildMI(BB, dl, TII->get(StoreMnemonic))
12569  .addReg(dest)
12570  .addReg(ptrA)
12571  .addReg(ptrB);
12572  BB->addSuccessor(exitMBB);
12573 
12574  // exitMBB:
12575  // ...
12576  BB = exitMBB;
12577  } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12578  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12579  // We must use 64-bit registers for addresses when targeting 64-bit,
12580  // since we're actually doing arithmetic on them. Other registers
12581  // can be 32-bit.
12582  bool is64bit = Subtarget.isPPC64();
12583  bool isLittleEndian = Subtarget.isLittleEndian();
12584  bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12585 
12586  Register dest = MI.getOperand(0).getReg();
12587  Register ptrA = MI.getOperand(1).getReg();
12588  Register ptrB = MI.getOperand(2).getReg();
12589  Register oldval = MI.getOperand(3).getReg();
12590  Register newval = MI.getOperand(4).getReg();
12591  DebugLoc dl = MI.getDebugLoc();
12592 
12593  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12594  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12595  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12596  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12597  F->insert(It, loop1MBB);
12598  F->insert(It, loop2MBB);
12599  F->insert(It, midMBB);
12600  F->insert(It, exitMBB);
12601  exitMBB->splice(exitMBB->begin(), BB,
12602  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12604 
12605  MachineRegisterInfo &RegInfo = F->getRegInfo();
12606  const TargetRegisterClass *RC =
12607  is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12608  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12609 
12610  Register PtrReg = RegInfo.createVirtualRegister(RC);
12611  Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12612  Register ShiftReg =
12613  isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12614  Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12615  Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12616  Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12617  Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12618  Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12619  Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12620  Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12621  Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12622  Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12623  Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12624  Register Ptr1Reg;
12625  Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12626  Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12627  // thisMBB:
12628  // ...
12629  // fallthrough --> loopMBB
12630  BB->addSuccessor(loop1MBB);
12631 
12632  // The 4-byte load must be aligned, while a char or short may be
12633  // anywhere in the word. Hence all this nasty bookkeeping code.
12634  // add ptr1, ptrA, ptrB [copy if ptrA==0]
12635  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12636  // xori shift, shift1, 24 [16]
12637  // rlwinm ptr, ptr1, 0, 0, 29
12638  // slw newval2, newval, shift
12639  // slw oldval2, oldval,shift
12640  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12641  // slw mask, mask2, shift
12642  // and newval3, newval2, mask
12643  // and oldval3, oldval2, mask
12644  // loop1MBB:
12645  // lwarx tmpDest, ptr
12646  // and tmp, tmpDest, mask
12647  // cmpw tmp, oldval3
12648  // bne- midMBB
12649  // loop2MBB:
12650  // andc tmp2, tmpDest, mask
12651  // or tmp4, tmp2, newval3
12652  // stwcx. tmp4, ptr
12653  // bne- loop1MBB
12654  // b exitBB
12655  // midMBB:
12656  // stwcx. tmpDest, ptr
12657  // exitBB:
12658  // srw dest, tmpDest, shift
12659  if (ptrA != ZeroReg) {
12660  Ptr1Reg = RegInfo.createVirtualRegister(RC);
12661  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12662  .addReg(ptrA)
12663  .addReg(ptrB);
12664  } else {
12665  Ptr1Reg = ptrB;
12666  }
12667 
12668  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12669  // mode.
12670  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12671  .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12672  .addImm(3)
12673  .addImm(27)
12674  .addImm(is8bit ? 28 : 27);
12675  if (!isLittleEndian)
12676  BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12677  .addReg(Shift1Reg)
12678  .addImm(is8bit ? 24 : 16);
12679  if (is64bit)
12680  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12681  .addReg(Ptr1Reg)
12682  .addImm(0)
12683  .addImm(61);
12684  else
12685  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12686  .addReg(Ptr1Reg)
12687  .addImm(0)
12688  .addImm(0)
12689  .addImm(29);
12690  BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12691  .addReg(newval)
12692  .addReg(ShiftReg);
12693  BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12694  .addReg(oldval)
12695  .addReg(ShiftReg);
12696  if (is8bit)
12697  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12698  else {
12699  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12700  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12701  .addReg(Mask3Reg)
12702  .addImm(65535);
12703  }
12704  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12705  .addReg(Mask2Reg)
12706  .addReg(ShiftReg);
12707  BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12708  .addReg(NewVal2Reg)
12709  .addReg(MaskReg);
12710  BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12711  .addReg(OldVal2Reg)
12712  .addReg(MaskReg);
12713 
12714  BB = loop1MBB;
12715  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12716  .addReg(ZeroReg)
12717  .addReg(PtrReg);
12718  BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12719  .addReg(TmpDestReg)
12720  .addReg(MaskReg);
12721  BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
12722  .addReg(TmpReg)
12723  .addReg(OldVal3Reg);
12724  BuildMI(BB, dl, TII->get(PPC::BCC))
12726  .addReg(PPC::CR0)
12727  .addMBB(midMBB);
12728  BB->addSuccessor(loop2MBB);
12729  BB->addSuccessor(midMBB);
12730 
12731  BB = loop2MBB;
12732  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12733  .addReg(TmpDestReg)
12734  .addReg(MaskReg);
12735  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12736  .addReg(Tmp2Reg)
12737  .addReg(NewVal3Reg);
12738  BuildMI(BB, dl, TII->get(PPC::STWCX))
12739  .addReg(Tmp4Reg)
12740  .addReg(ZeroReg)
12741  .addReg(PtrReg);
12742  BuildMI(BB, dl, TII->get(PPC::BCC))
12744  .addReg(PPC::CR0)
12745  .addMBB(loop1MBB);
12746  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12747  BB->addSuccessor(loop1MBB);
12748  BB->addSuccessor(exitMBB);
12749 
12750  BB = midMBB;
12751  BuildMI(BB, dl, TII->get(PPC::STWCX))
12752  .addReg(TmpDestReg)
12753  .addReg(ZeroReg)
12754  .addReg(PtrReg);
12755  BB->addSuccessor(exitMBB);
12756 
12757  // exitMBB:
12758  // ...
12759  BB = exitMBB;
12760  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12761  .addReg(TmpReg)
12762  .addReg(ShiftReg);
12763  } else if (MI.getOpcode() == PPC::FADDrtz) {
12764  // This pseudo performs an FADD with rounding mode temporarily forced
12765  // to round-to-zero. We emit this via custom inserter since the FPSCR
12766  // is not modeled at the SelectionDAG level.
12767  Register Dest = MI.getOperand(0).getReg();
12768  Register Src1 = MI.getOperand(1).getReg();
12769  Register Src2 = MI.getOperand(2).getReg();
12770  DebugLoc dl = MI.getDebugLoc();
12771 
12772  MachineRegisterInfo &RegInfo = F->getRegInfo();
12773  Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12774 
12775  // Save FPSCR value.
12776  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12777 
12778  // Set rounding mode to round-to-zero.
12779  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
12780  .addImm(31)
12782 
12783  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
12784  .addImm(30)
12786 
12787  // Perform addition.
12788  auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
12789  .addReg(Src1)
12790  .addReg(Src2);
12791  if (MI.getFlag(MachineInstr::NoFPExcept))
12793 
12794  // Restore FPSCR value.
12795  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12796  } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12797  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12798  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12799  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12800  unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12801  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12802  ? PPC::ANDI8_rec
12803  : PPC::ANDI_rec;
12804  bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12805  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12806 
12807  MachineRegisterInfo &RegInfo = F->getRegInfo();
12808  Register Dest = RegInfo.createVirtualRegister(
12809  Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12810 
12811  DebugLoc Dl = MI.getDebugLoc();
12812  BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12813  .addReg(MI.getOperand(1).getReg())
12814  .addImm(1);
12815  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12816  MI.getOperand(0).getReg())
12817  .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12818  } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12819  DebugLoc Dl = MI.getDebugLoc();
12820  MachineRegisterInfo &RegInfo = F->getRegInfo();
12821  Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12822  BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12823  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12824  MI.getOperand(0).getReg())
12825  .addReg(CRReg);
12826  } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12827  DebugLoc Dl = MI.getDebugLoc();
12828  unsigned Imm = MI.getOperand(1).getImm();
12829  BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12830  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12831  MI.getOperand(0).getReg())
12832  .addReg(PPC::CR0EQ);
12833  } else if (MI.getOpcode() == PPC::SETRNDi) {
12834  DebugLoc dl = MI.getDebugLoc();
12835  Register OldFPSCRReg = MI.getOperand(0).getReg();
12836 
12837  // Save FPSCR value.
12838  if (MRI.use_empty(OldFPSCRReg))
12839  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
12840  else
12841  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12842 
12843  // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12844  // the following settings:
12845  // 00 Round to nearest
12846  // 01 Round to 0
12847  // 10 Round to +inf
12848  // 11 Round to -inf
12849 
12850  // When the operand is immediate, using the two least significant bits of
12851  // the immediate to set the bits 62:63 of FPSCR.
12852  unsigned Mode = MI.getOperand(1).getImm();
12853  BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12854  .addImm(31)
12856 
12857  BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12858  .addImm(30)
12860  } else if (MI.getOpcode() == PPC::SETRND) {
12861  DebugLoc dl = MI.getDebugLoc();
12862 
12863  // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12864  // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12865  // If the target doesn't have DirectMove, we should use stack to do the
12866  // conversion, because the target doesn't have the instructions like mtvsrd
12867  // or mfvsrd to do this conversion directly.
12868  auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12869  if (Subtarget.hasDirectMove()) {
12870  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12871  .addReg(SrcReg);
12872  } else {
12873  // Use stack to do the register copy.
12874  unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12875  MachineRegisterInfo &RegInfo = F->getRegInfo();
12876  const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12877  if (RC == &PPC::F8RCRegClass) {
12878  // Copy register from F8RCRegClass to G8RCRegclass.
12879  assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12880  "Unsupported RegClass.");
12881 
12882  StoreOp = PPC::STFD;
12883  LoadOp = PPC::LD;
12884  } else {
12885  // Copy register from G8RCRegClass to F8RCRegclass.
12886  assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12887  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12888  "Unsupported RegClass.");
12889  }
12890 
12891  MachineFrameInfo &MFI = F->getFrameInfo();
12892  int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12893 
12894  MachineMemOperand *MMOStore = F->getMachineMemOperand(
12895  MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12897  MFI.getObjectAlign(FrameIdx));
12898 
12899  // Store the SrcReg into the stack.
12900  BuildMI(*BB, MI, dl, TII->get(StoreOp))
12901  .addReg(SrcReg)
12902  .addImm(0)
12903  .addFrameIndex(FrameIdx)
12904  .addMemOperand(MMOStore);
12905 
12906  MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12907  MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12909  MFI.getObjectAlign(FrameIdx));
12910 
12911  // Load from the stack where SrcReg is stored, and save to DestReg,
12912  // so we have done the RegClass conversion from RegClass::SrcReg to
12913  // RegClass::DestReg.
12914  BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12915  .addImm(0)
12916  .addFrameIndex(FrameIdx)
12917  .addMemOperand(MMOLoad);
12918  }
12919  };
12920 
12921  Register OldFPSCRReg = MI.getOperand(0).getReg();
12922 
12923  // Save FPSCR value.
12924  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12925 
12926  // When the operand is gprc register, use two least significant bits of the
12927  // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12928  //
12929  // copy OldFPSCRTmpReg, OldFPSCRReg
12930  // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12931  // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12932  // copy NewFPSCRReg, NewFPSCRTmpReg
12933  // mtfsf 255, NewFPSCRReg
12934  MachineOperand SrcOp = MI.getOperand(1);
12935  MachineRegisterInfo &RegInfo = F->getRegInfo();
12936  Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12937 
12938  copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12939 
12940  Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12941  Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12942 
12943  // The first operand of INSERT_SUBREG should be a register which has
12944  // subregisters, we only care about its RegClass, so we should use an
12945  // IMPLICIT_DEF register.
12946  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12947  BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12948  .addReg(ImDefReg)
12949  .add(SrcOp)
12950  .addImm(1);
12951 
12952  Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12953  BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12954  .addReg(OldFPSCRTmpReg)
12955  .addReg(ExtSrcReg)
12956  .addImm(0)
12957  .addImm(62);
12958 
12959  Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12960  copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12961 
12962  // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12963  // bits of FPSCR.
12964  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12965  .addImm(255)
12966  .addReg(NewFPSCRReg)
12967  .addImm(0)
12968  .addImm(0);
12969  } else if (MI.getOpcode() == PPC::SETFLM) {
12970  DebugLoc Dl = MI.getDebugLoc();
12971 
12972  // Result of setflm is previous FPSCR content, so we need to save it first.
12973  Register OldFPSCRReg = MI.getOperand(0).getReg();
12974  if (MRI.use_empty(OldFPSCRReg))
12975  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
12976  else
12977  BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12978 
12979  // Put bits in 32:63 to FPSCR.
12980  Register NewFPSCRReg = MI.getOperand(1).getReg();
12981  BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12982  .addImm(255)
12983  .addReg(NewFPSCRReg)
12984  .addImm(0)
12985  .addImm(0);
12986  } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12987  MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12988  return emitProbedAlloca(MI, BB);
12989  } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
12990  DebugLoc DL = MI.getDebugLoc();
12991  Register Src = MI.getOperand(2).getReg();
12992  Register Lo = MI.getOperand(0).getReg();
12993  Register Hi = MI.getOperand(1).getReg();
12994  BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
12995  .addDef(Lo)
12996  .addUse(Src, 0, PPC::sub_gp8_x1);
12997  BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
12998  .addDef(Hi)
12999  .addUse(Src, 0, PPC::sub_gp8_x0);
13000  } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13001  MI.getOpcode() == PPC::STQX_PSEUDO) {
13002  DebugLoc DL = MI.getDebugLoc();
13003  // Ptr is used as the ptr_rc_no_r0 part
13004  // of LQ/STQ's memory operand and adding result of RA and RB,
13005  // so it has to be g8rc_and_g8rc_nox0.
13006  Register Ptr =
13007  F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13008  Register Val = MI.getOperand(0).getReg();
13009  Register RA = MI.getOperand(1).getReg();
13010  Register RB = MI.getOperand(2).getReg();
13011  BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13012  BuildMI(*BB, MI, DL,
13013  MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13014  : TII->get(PPC::STQ))
13015  .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13016  .addImm(0)
13017  .addReg(Ptr);
13018  } else {
13019  llvm_unreachable("Unexpected instr type to insert");
13020  }
13021 
13022  MI.eraseFromParent(); // The pseudo instruction is gone now.
13023  return BB;
13024 }
13025 
13026 //===----------------------------------------------------------------------===//
13027 // Target Optimization Hooks
13028 //===----------------------------------------------------------------------===//
13029 
13030 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13031  // For the estimates, convergence is quadratic, so we essentially double the
13032  // number of digits correct after every iteration. For both FRE and FRSQRTE,
13033  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13034  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13035  int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13036  if (VT.getScalarType() == MVT::f64)
13037  RefinementSteps++;
13038  return RefinementSteps;
13039 }
13040 
13041 SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13042  const DenormalMode &Mode) const {
13043  // We only have VSX Vector Test for software Square Root.
13044  EVT VT = Op.getValueType();
13045  if (!isTypeLegal(MVT::i1) ||
13046  (VT != MVT::f64 &&
13047  ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13049 
13050  SDLoc DL(Op);
13051  // The output register of FTSQRT is CR field.
13053  // ftsqrt BF,FRB
13054  // Let e_b be the unbiased exponent of the double-precision
13055  // floating-point operand in register FRB.
13056  // fe_flag is set to 1 if either of the following conditions occurs.
13057  // - The double-precision floating-point operand in register FRB is a zero,
13058  // a NaN, or an infinity, or a negative value.
13059  // - e_b is less than or equal to -970.
13060  // Otherwise fe_flag is set to 0.
13061  // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13062  // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13063  // exponent is less than -970)
13064  SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13065  return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13066  FTSQRT, SRIdxVal),
13067  0);
13068 }
13069 
13070 SDValue
13071 PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13072  SelectionDAG &DAG) const {
13073  // We only have VSX Vector Square Root.
13074  EVT VT = Op.getValueType();
13075  if (VT != MVT::f64 &&
13076  ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13078 
13079  return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13080 }
13081 
13082 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13083  int Enabled, int &RefinementSteps,
13084  bool &UseOneConstNR,
13085  bool Reciprocal) const {
13086  EVT VT = Operand.getValueType();
13087  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13088  (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13089  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13090  (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13091  if (RefinementSteps == ReciprocalEstimate::Unspecified)
13092  RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13093 
13094  // The Newton-Raphson computation with a single constant does not provide
13095  // enough accuracy on some CPUs.
13096  UseOneConstNR = !Subtarget.needsTwoConstNR();
13097  return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13098  }
13099  return SDValue();
13100 }
13101 
13102 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13103  int Enabled,
13104  int &RefinementSteps) const {
13105  EVT VT = Operand.getValueType();
13106  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13107  (VT == MVT::f64 && Subtarget.hasFRE()) ||
13108  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13109  (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13110  if (RefinementSteps == ReciprocalEstimate::Unspecified)
13111  RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13112  return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13113  }
13114  return SDValue();
13115 }
13116 
13117 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13118  // Note: This functionality is used only when unsafe-fp-math is enabled, and
13119  // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13120  // enabled for division), this functionality is redundant with the default
13121  // combiner logic (once the division -> reciprocal/multiply transformation
13122  // has taken place). As a result, this matters more for older cores than for
13123  // newer ones.
13124 
13125  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13126  // reciprocal if there are two or more FDIVs (for embedded cores with only
13127  // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13128  switch (Subtarget.getCPUDirective()) {
13129  default:
13130  return 3;
13131  case PPC::DIR_440:
13132  case PPC::DIR_A2:
13133  case PPC::DIR_E500:
13134  case PPC::DIR_E500mc:
13135  case PPC::DIR_E5500:
13136  return 2;
13137  }
13138 }
13139 
13140 // isConsecutiveLSLoc needs to work even if all adds have not yet been
13141 // collapsed, and so we need to look through chains of them.
13143  int64_t& Offset, SelectionDAG &DAG) {
13144  if (DAG.isBaseWithConstantOffset(Loc)) {
13145  Base = Loc.getOperand(0);
13146  Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13147 
13148  // The base might itself be a base plus an offset, and if so, accumulate
13149  // that as well.
13150  getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
13151  }
13152 }
13153 
13155  unsigned Bytes, int Dist,
13156  SelectionDAG &DAG) {
13157  if (VT.getSizeInBits() / 8 != Bytes)
13158  return false;
13159 
13160  SDValue BaseLoc = Base->getBasePtr();
13161  if (Loc.getOpcode() == ISD::FrameIndex) {
13162  if (BaseLoc.getOpcode() != ISD::FrameIndex)
13163  return false;
13164  const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
13165  int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13166  int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13167  int FS = MFI.getObjectSize(FI);
13168  int BFS = MFI.getObjectSize(BFI);
13169  if (FS != BFS || FS != (int)Bytes) return false;
13170  return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13171  }
13172 
13173  SDValue Base1 = Loc, Base2 = BaseLoc;
13174  int64_t Offset1 = 0, Offset2 = 0;
13175  getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13176  getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13177  if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13178  return true;
13179 
13180  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13181  const GlobalValue *GV1 = nullptr;
13182  const GlobalValue *GV2 = nullptr;
13183  Offset1 = 0;
13184  Offset2 = 0;
13185  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13186  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13187  if (isGA1 && isGA2 && GV1 == GV2)
13188  return Offset1 == (Offset2 + Dist*Bytes);
13189  return false;
13190 }
13191 
13192 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13193 // not enforce equality of the chain operands.
13195  unsigned Bytes, int Dist,
13196  SelectionDAG &DAG) {
13197  if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13198  EVT VT = LS->getMemoryVT();
13199  SDValue Loc = LS->getBasePtr();
13200  return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13201  }
13202 
13203  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13204  EVT VT;
13205  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13206  default: return false;
13207  case Intrinsic::ppc_altivec_lvx:
13208  case Intrinsic::ppc_altivec_lvxl:
13209  case Intrinsic::ppc_vsx_lxvw4x:
13210  case Intrinsic::ppc_vsx_lxvw4x_be:
13211  VT = MVT::v4i32;
13212  break;
13213  case Intrinsic::ppc_vsx_lxvd2x:
13214  case Intrinsic::ppc_vsx_lxvd2x_be:
13215  VT = MVT::v2f64;
13216  break;
13217  case Intrinsic::ppc_altivec_lvebx:
13218  VT = MVT::i8;
13219  break;
13220  case Intrinsic::ppc_altivec_lvehx:
13221  VT = MVT::i16;
13222  break;
13223  case Intrinsic::ppc_altivec_lvewx:
13224  VT = MVT::i32;
13225  break;
13226  }
13227 
13228  return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13229  }
13230 
13231  if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13232  EVT VT;
13233  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13234  default: return false;
13235  case Intrinsic::ppc_altivec_stvx:
13236  case Intrinsic::ppc_altivec_stvxl:
13237  case Intrinsic::ppc_vsx_stxvw4x:
13238  VT = MVT::v4i32;
13239  break;
13240  case Intrinsic::ppc_vsx_stxvd2x:
13241  VT = MVT::v2f64;
13242  break;
13243  case Intrinsic::ppc_vsx_stxvw4x_be:
13244  VT = MVT::v4i32;
13245  break;
13246  case Intrinsic::ppc_vsx_stxvd2x_be:
13247  VT = MVT::v2f64;
13248  break;
13249  case Intrinsic::ppc_altivec_stvebx:
13250  VT = MVT::i8;
13251  break;
13252  case Intrinsic::ppc_altivec_stvehx:
13253  VT = MVT::i16;
13254  break;
13255  case Intrinsic::ppc_altivec_stvewx:
13256  VT = MVT::i32;
13257  break;
13258  }
13259 
13260  return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13261  }
13262 
13263  return false;
13264 }
13265 
13266 // Return true is there is a nearyby consecutive load to the one provided
13267 // (regardless of alignment). We search up and down the chain, looking though
13268 // token factors and other loads (but nothing else). As a result, a true result
13269 // indicates that it is safe to create a new consecutive load adjacent to the
13270 // load provided.
13272  SDValue Chain = LD->getChain();
13273  EVT VT = LD->getMemoryVT();
13274 
13275  SmallSet<SDNode *, 16> LoadRoots;
13276  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13277  SmallSet<SDNode *, 16> Visited;
13278 
13279  // First, search up the chain, branching to follow all token-factor operands.
13280  // If we find a consecutive load, then we're done, otherwise, record all
13281  // nodes just above the top-level loads and token factors.
13282  while (!Queue.empty()) {
13283  SDNode *ChainNext = Queue.pop_back_val();
13284  if (!Visited.insert(ChainNext).second)
13285  continue;
13286 
13287  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13288  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13289  return true;
13290 
13291  if (!Visited.count(ChainLD->getChain().getNode()))
13292  Queue.push_back(ChainLD->getChain().getNode());
13293  } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13294  for (const SDUse &O : ChainNext->ops())
13295  if (!Visited.count(O.getNode()))
13296  Queue.push_back(O.getNode());
13297  } else
13298  LoadRoots.insert(ChainNext);
13299  }
13300 
13301  // Second, search down the chain, starting from the top-level nodes recorded
13302  // in the first phase. These top-level nodes are the nodes just above all
13303  // loads and token factors. Starting with their uses, recursively look though
13304  // all loads (just the chain uses) and token factors to find a consecutive
13305  // load.
13306  Visited.clear();
13307  Queue.clear();
13308 
13309  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
13310  IE = LoadRoots.end(); I != IE; ++I) {
13311  Queue.push_back(*I);
13312 
13313  while (!Queue.empty()) {
13314  SDNode *LoadRoot = Queue.pop_back_val();
13315  if (!Visited.insert(LoadRoot).second)
13316  continue;
13317 
13318  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13319  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13320  return true;
13321 
13322  for (SDNode *U : LoadRoot->uses())
13323  if (((isa<MemSDNode>(U) &&
13324  cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13325  U->getOpcode() == ISD::TokenFactor) &&
13326  !Visited.count(U))
13327  Queue.push_back(U);
13328  }
13329  }
13330 
13331  return false;
13332 }
13333 
13334 /// This function is called when we have proved that a SETCC node can be replaced
13335 /// by subtraction (and other supporting instructions) so that the result of
13336 /// comparison is kept in a GPR instead of CR. This function is purely for
13337 /// codegen purposes and has some flags to guide the codegen process.
13338 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13339  bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13340  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13341 
13342  // Zero extend the operands to the largest legal integer. Originally, they
13343  // must be of a strictly smaller size.
13344  auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13345  DAG.getConstant(Size, DL, MVT::i32));
13346  auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13347  DAG.getConstant(Size, DL, MVT::i32));
13348 
13349  // Swap if needed. Depends on the condition code.
13350  if (Swap)
13351  std::swap(Op0, Op1);
13352 
13353  // Subtract extended integers.
13354  auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13355 
13356  // Move the sign bit to the least significant position and zero out the rest.
13357  // Now the least significant bit carries the result of original comparison.
13358  auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13359  DAG.getConstant(Size - 1, DL, MVT::i32));
13360  auto Final = Shifted;
13361 
13362  // Complement the result if needed. Based on the condition code.
13363  if (Complement)
13364  Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13365  DAG.getConstant(1, DL, MVT::i64));
13366 
13367  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13368 }
13369 
13370 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13371  DAGCombinerInfo &DCI) const {
13372  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13373 
13374  SelectionDAG &DAG = DCI.DAG;
13375  SDLoc DL(N);
13376 
13377  // Size of integers being compared has a critical role in the following
13378  // analysis, so we prefer to do this when all types are legal.
13379  if (!DCI.isAfterLegalizeDAG())
13380  return SDValue();
13381 
13382  // If all users of SETCC extend its value to a legal integer type
13383  // then we replace SETCC with a subtraction
13384  for (const SDNode *U : N->uses())
13385  if (U->getOpcode() != ISD::ZERO_EXTEND)
13386  return SDValue();
13387 
13388  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13389  auto OpSize = N->getOperand(0).getValueSizeInBits();
13390 
13392 
13393  if (OpSize < Size) {
13394  switch (CC) {
13395  default: break;
13396  case ISD::SETULT:
13397  return generateEquivalentSub(N, Size, false, false, DL, DAG);
13398  case ISD::SETULE:
13399  return generateEquivalentSub(N, Size, true, true, DL, DAG);
13400  case ISD::SETUGT:
13401  return generateEquivalentSub(N, Size, false, true, DL, DAG);
13402  case ISD::SETUGE:
13403  return generateEquivalentSub(N, Size, true, false, DL, DAG);
13404  }
13405  }
13406 
13407  return SDValue();
13408 }
13409 
13410 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13411  DAGCombinerInfo &DCI) const {
13412  SelectionDAG &DAG = DCI.DAG;
13413  SDLoc dl(N);
13414 
13415  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
13416  // If we're tracking CR bits, we need to be careful that we don't have:
13417  // trunc(binary-ops(zext(x), zext(y)))
13418  // or
13419  // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13420  // such that we're unnecessarily moving things into GPRs when it would be
13421  // better to keep them in CR bits.
13422 
13423  // Note that trunc here can be an actual i1 trunc, or can be the effective
13424  // truncation that comes from a setcc or select_cc.
13425  if (N->getOpcode() == ISD::TRUNCATE &&
13426  N->getValueType(0) != MVT::i1)
13427  return SDValue();
13428 
13429  if (N->getOperand(0).getValueType() != MVT::i32 &&
13430  N->getOperand(0).getValueType() != MVT::i64)
13431  return SDValue();
13432 
13433  if (N->getOpcode() == ISD::SETCC ||
13434  N->getOpcode() == ISD::SELECT_CC) {
13435  // If we're looking at a comparison, then we need to make sure that the
13436  // high bits (all except for the first) don't matter the result.
13437  ISD::CondCode CC =
13438  cast<CondCodeSDNode>(N->getOperand(
13439  N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13440  unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13441 
13442  if (ISD::isSignedIntSetCC(CC)) {
13443  if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13444  DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13445  return SDValue();
13446  } else if (ISD::isUnsignedIntSetCC(CC)) {
13447  if (!DAG.MaskedValueIsZero(N->getOperand(0),
13448  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
13449  !DAG.MaskedValueIsZero(N->getOperand(1),
13450  APInt::getHighBitsSet(OpBits, OpBits-1)))
13451  return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13452  : SDValue());
13453  } else {
13454  // This is neither a signed nor an unsigned comparison, just make sure
13455  // that the high bits are equal.
13456  KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13457  KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13458 
13459  // We don't really care about what is known about the first bit (if
13460  // anything), so pretend that it is known zero for both to ensure they can
13461  // be compared as constants.
13462  Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
13463  Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
13464 
13465  if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
13466  Op1Known.getConstant() != Op2Known.getConstant())
13467  return SDValue();
13468  }
13469  }
13470 
13471  // We now know that the higher-order bits are irrelevant, we just need to
13472  // make sure that all of the intermediate operations are bit operations, and
13473  // all inputs are extensions.
13474  if (N->getOperand(0).getOpcode() != ISD::AND &&
13475  N->getOperand(0).getOpcode() != ISD::OR &&
13476  N->getOperand(0).getOpcode() != ISD::XOR &&
13477  N->getOperand(0).getOpcode() != ISD::SELECT &&
13478  N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13479  N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13480  N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13481  N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13482  N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13483  return SDValue();
13484 
13485  if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13486  N->getOperand(1).getOpcode() != ISD::AND &&
13487  N->getOperand(1).getOpcode() != ISD::OR &&
13488  N->getOperand(1).getOpcode() != ISD::XOR &&
13489  N->getOperand(1).getOpcode() != ISD::SELECT &&
13490  N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13491  N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13492  N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13493  N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13494  N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13495  return SDValue();
13496 
13497  SmallVector<SDValue, 4> Inputs;
13498  SmallVector<SDValue, 8> BinOps, PromOps;
13499  SmallPtrSet<SDNode *, 16> Visited;
13500 
13501  for (unsigned i = 0; i < 2; ++i) {
13502  if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13503  N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13504  N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13505  N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13506  isa<ConstantSDNode>(N->getOperand(i)))
13507  Inputs.push_back(N->getOperand(i));
13508  else
13509  BinOps.push_back(N->getOperand(i));
13510 
13511  if (N->getOpcode() == ISD::TRUNCATE)
13512  break;
13513  }
13514 
13515  // Visit all inputs, collect all binary operations (and, or, xor and
13516  // select) that are all fed by extensions.
13517  while (!BinOps.empty()) {
13518  SDValue BinOp = BinOps.pop_back_val();
13519 
13520  if (!Visited.insert(BinOp.getNode()).second)
13521  continue;
13522 
13523  PromOps.push_back(BinOp);
13524 
13525  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13526  // The condition of the select is not promoted.
13527  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13528  continue;
13529  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13530  continue;
13531 
13532  if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13533  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13534  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13535  BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13536  isa<ConstantSDNode>(BinOp.getOperand(i))) {
13537  Inputs.push_back(BinOp.getOperand(i));
13538  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13539  BinOp.getOperand(i).getOpcode() == ISD::OR ||
13540  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13541  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13542  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13543  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13544  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13545  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13546  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13547  BinOps.push_back(BinOp.getOperand(i));
13548  } else {
13549  // We have an input that is not an extension or another binary
13550  // operation; we'll abort this transformation.
13551  return SDValue();
13552  }
13553  }
13554  }
13555 
13556  // Make sure that this is a self-contained cluster of operations (which
13557  // is not quite the same thing as saying that everything has only one
13558  // use).
13559  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13560  if (isa<ConstantSDNode>(Inputs[i]))
13561  continue;
13562 
13563  for (const SDNode *User : Inputs[i].getNode()->uses()) {
13564  if (User != N && !Visited.count(User))
13565  return SDValue();
13566 
13567  // Make sure that we're not going to promote the non-output-value
13568  // operand(s) or SELECT or SELECT_CC.
13569  // FIXME: Although we could sometimes handle this, and it does occur in
13570  // practice that one of the condition inputs to the select is also one of
13571  // the outputs, we currently can't deal with this.
13572  if (User->getOpcode() == ISD::SELECT) {
13573  if (User->getOperand(0) == Inputs[i])
13574  return SDValue();
13575  } else if (User->getOpcode() == ISD::SELECT_CC) {
13576  if (User->getOperand(0) == Inputs[i] ||
13577  User->getOperand(1) == Inputs[i])
13578  return SDValue();
13579  }
13580  }
13581  }
13582 
13583  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13584  for (const SDNode *User : PromOps[i].getNode()->uses()) {
13585  if (User != N && !Visited.count(User))
13586  return SDValue();
13587 
13588  // Make sure that we're not going to promote the non-output-value
13589  // operand(s) or SELECT or SELECT_CC.
13590  // FIXME: Although we could sometimes handle this, and it does occur in
13591  // practice that one of the condition inputs to the select is also one of
13592  // the outputs, we currently can't deal with this.
13593  if (User->getOpcode() == ISD::SELECT) {
13594  if (User->getOperand(0) == PromOps[i])
13595  return SDValue();
13596  } else if (User->getOpcode() == ISD::SELECT_CC) {
13597  if (User->getOperand(0) == PromOps[i] ||
13598  User->getOperand(1) == PromOps[i])
13599  return SDValue();
13600  }
13601  }
13602  }
13603 
13604  // Replace all inputs with the extension operand.
13605  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13606  // Constants may have users outside the cluster of to-be-promoted nodes,
13607  // and so we need to replace those as we do the promotions.
13608  if (isa<ConstantSDNode>(Inputs[i]))
13609  continue;
13610  else
13611  DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13612  }
13613 
13614  std::list<HandleSDNode> PromOpHandles;
13615  for (auto &PromOp : PromOps)
13616  PromOpHandles.emplace_back(PromOp);
13617 
13618  // Replace all operations (these are all the same, but have a different
13619  // (i1) return type). DAG.getNode will validate that the types of
13620  // a binary operator match, so go through the list in reverse so that
13621  // we've likely promoted both operands first. Any intermediate truncations or
13622  // extensions disappear.
13623  while (!PromOpHandles.empty()) {
13624  SDValue PromOp = PromOpHandles.back().getValue();
13625  PromOpHandles.pop_back();
13626 
13627  if (PromOp.getOpcode() == ISD::TRUNCATE ||
13628  PromOp.getOpcode() == ISD::SIGN_EXTEND ||
13629  PromOp.getOpcode() == ISD::ZERO_EXTEND ||
13630  PromOp.getOpcode() == ISD::ANY_EXTEND) {
13631  if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
13632  PromOp.getOperand(0).getValueType() != MVT::i1) {
13633  // The operand is not yet ready (see comment below).
13634  PromOpHandles.emplace_front(PromOp);
13635  continue;
13636  }
13637 
13638  SDValue RepValue = PromOp.getOperand(0);
13639  if (isa<ConstantSDNode>(RepValue))
13640  RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
13641 
13642  DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
13643  continue;
13644  }
13645 
13646  unsigned C;
13647  switch (PromOp.getOpcode()) {
13648  default: C = 0; break;
13649  case ISD::SELECT: C = 1; break;
13650  case ISD::SELECT_CC: C = 2; break;
13651  }
13652 
13653  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13654  PromOp.getOperand(C).getValueType() != MVT::i1) ||
13655  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13656  PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
13657  // The to-be-promoted operands of this node have not yet been
13658  // promoted (this should be rare because we're going through the
13659  // list backward, but if one of the operands has several users in
13660  // this cluster of to-be-promoted nodes, it is possible).
13661  PromOpHandles.emplace_front(PromOp);
13662  continue;
13663  }
13664 
13665  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13666  PromOp.getNode()->op_end());
13667 
13668  // If there are any constant inputs, make sure they're replaced now.
13669  for (unsigned i = 0; i < 2; ++i)
13670  if (isa<ConstantSDNode>(Ops[C+i]))
13671  Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13672 
13673  DAG.ReplaceAllUsesOfValueWith(PromOp,
13674  DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13675  }
13676 
13677  // Now we're left with the initial truncation itself.
13678  if (N->getOpcode() == ISD::TRUNCATE)
13679  return N->getOperand(0);
13680 
13681  // Otherwise, this is a comparison. The operands to be compared have just
13682  // changed type (to i1), but everything else is the same.
13683  return SDValue(N, 0);
13684 }
13685 
13686 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13687  DAGCombinerInfo &DCI) const {
13688  SelectionDAG &DAG = DCI.DAG;
13689  SDLoc dl(N);
13690 
13691  // If we're tracking CR bits, we need to be careful that we don't have:
13692  // zext(binary-ops(trunc(x), trunc(y)))
13693  // or
13694  // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13695  // such that we're unnecessarily moving things into CR bits that can more
13696  // efficiently stay in GPRs. Note that if we're not certain that the high
13697  // bits are set as required by the final extension, we still may need to do
13698  // some masking to get the proper behavior.
13699 
13700  // This same functionality is important on PPC64 when dealing with
13701  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13702  // the return values of functions. Because it is so similar, it is handled
13703  // here as well.
13704 
13705  if (N->getValueType(0) != MVT::i32 &&
13706  N->getValueType(0) != MVT::i64)
13707  return SDValue();
13708 
13709  if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13710  (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13711  return SDValue();
13712 
13713  if (N->getOperand(0).getOpcode() != ISD::AND &&
13714  N->getOperand(0).getOpcode() != ISD::OR &&
13715  N->getOperand(0).getOpcode() != ISD::XOR &&
13716  N->getOperand(0).getOpcode() != ISD::SELECT &&
13717  N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13718  return SDValue();
13719 
13720  SmallVector<SDValue, 4> Inputs;
13721  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13722  SmallPtrSet<SDNode *, 16> Visited;
13723 
13724  // Visit all inputs, collect all binary operations (and, or, xor and
13725  // select) that are all fed by truncations.
13726  while (!BinOps.empty()) {
13727  SDValue BinOp = BinOps.pop_back_val();
13728 
13729  if (!Visited.insert(BinOp.getNode()).second)
13730  continue;
13731 
13732  PromOps.push_back(BinOp);
13733 
13734  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13735  // The condition of the select is not promoted.
13736  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13737  continue;
13738  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13739  continue;
13740 
13741  if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13742  isa<ConstantSDNode>(BinOp.getOperand(i))) {
13743  Inputs.push_back(BinOp.getOperand(i));
13744  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13745  BinOp.getOperand(i).getOpcode() == ISD::OR ||
13746  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13747  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13748  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13749  BinOps.push_back(BinOp.getOperand(i));
13750  } else {
13751  // We have an input that is not a truncation or another binary
13752  // operation; we'll abort this transformation.
13753  return SDValue();
13754  }
13755  }
13756  }
13757 
13758  // The operands of a select that must be truncated when the select is
13759  // promoted because the operand is actually part of the to-be-promoted set.
13760  DenseMap<SDNode *, EVT> SelectTruncOp[2];
13761 
13762  // Make sure that this is a self-contained cluster of operations (which
13763  // is not quite the same thing as saying that everything has only one
13764  // use).
13765  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13766  if (isa<ConstantSDNode>(Inputs[i]))
13767  continue;
13768 
13769  for (SDNode *User : Inputs[i].getNode()->uses()) {
13770  if (User != N && !Visited.count(User))
13771  return SDValue();
13772 
13773  // If we're going to promote the non-output-value operand(s) or SELECT or
13774  // SELECT_CC, record them for truncation.
13775  if (User->getOpcode() == ISD::SELECT) {
13776  if (User->getOperand(0) == Inputs[i])
13777  SelectTruncOp[0].insert(std::make_pair(User,
13778  User->getOperand(0).getValueType()));
13779  } else if (User->getOpcode() == ISD::SELECT_CC) {
13780  if (User->getOperand(0) == Inputs[i])
13781  SelectTruncOp[0].insert(std::make_pair(User,
13782  User->getOperand(0).getValueType()));
13783  if (User->getOperand(1) == Inputs[i])
13784  SelectTruncOp[1].insert(std::make_pair(User,
13785  User->getOperand(1).getValueType()));
13786  }
13787  }
13788  }
13789 
13790  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13791  for (SDNode *User : PromOps[i].getNode()->uses()) {
13792  if (User != N && !Visited.count(User))
13793  return SDValue();
13794 
13795  // If we're going to promote the non-output-value operand(s) or SELECT or
13796  // SELECT_CC, record them for truncation.
13797  if (User->getOpcode() == ISD::SELECT) {
13798  if (User->getOperand(0) == PromOps[i])
13799  SelectTruncOp[0].insert(std::make_pair(User,
13800  User->getOperand(0).getValueType()));
13801  } else if (User->getOpcode() == ISD::SELECT_CC) {
13802  if (User->getOperand(0) == PromOps[i])
13803  SelectTruncOp[0].insert(std::make_pair(User,
13804  User->getOperand(0).getValueType()));
13805  if (User->getOperand(1) == PromOps[i])
13806  SelectTruncOp[1].insert(std::make_pair(User,
13807  User->getOperand(1).getValueType()));
13808  }
13809  }
13810  }
13811 
13812  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13813  bool ReallyNeedsExt = false;
13814  if (N->getOpcode() != ISD::ANY_EXTEND) {
13815  // If all of the inputs are not already sign/zero extended, then
13816  // we'll still need to do that at the end.
13817  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13818  if (isa<ConstantSDNode>(Inputs[i]))
13819  continue;
13820 
13821  unsigned OpBits =
13822  Inputs[i].getOperand(0).getValueSizeInBits();
13823  assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
13824 
13825  if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13826  !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13827  APInt::getHighBitsSet(OpBits,
13828  OpBits-PromBits))) ||
13829  (N->getOpcode() == ISD::SIGN_EXTEND &&
13830  DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13831  (OpBits-(PromBits-1)))) {
13832  ReallyNeedsExt = true;
13833  break;
13834  }
13835  }
13836  }
13837 
13838  // Replace all inputs, either with the truncation operand, or a
13839  // truncation or extension to the final output type.
13840  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13841  // Constant inputs need to be replaced with the to-be-promoted nodes that
13842  // use them because they might have users outside of the cluster of
13843  // promoted nodes.
13844  if (isa<ConstantSDNode>(Inputs[i]))
13845  continue;
13846 
13847  SDValue InSrc = Inputs[i].getOperand(0);
13848  if (Inputs[i].getValueType() == N->getValueType(0))
13849  DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13850  else if (N->getOpcode() == ISD::SIGN_EXTEND)
13851  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13852  DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13853  else if (N->getOpcode() == ISD::ZERO_EXTEND)
13854  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13855  DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13856  else
13857  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13858  DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13859  }
13860 
13861  std::list<HandleSDNode> PromOpHandles;
13862  for (auto &PromOp : PromOps)
13863  PromOpHandles.emplace_back(PromOp);
13864 
13865  // Replace all operations (these are all the same, but have a different
13866  // (promoted) return type). DAG.getNode will validate that the types of
13867  // a binary operator match, so go through the list in reverse so that
13868  // we've likely promoted both operands first.
13869  while (!PromOpHandles.empty()) {
13870  SDValue PromOp = PromOpHandles.back().getValue();
13871  PromOpHandles.pop_back();
13872 
13873  unsigned C;
13874  switch (PromOp.getOpcode()) {
13875  default: C = 0; break;
13876  case ISD::SELECT: C = 1; break;
13877  case ISD::SELECT_CC: C = 2; break;
13878  }
13879 
13880  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13881  PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13882  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13883  PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13884  // The to-be-promoted operands of this node have not yet been
13885  // promoted (this should be rare because we're going through the
13886  // list backward, but if one of the operands has several users in
13887  // this cluster of to-be-promoted nodes, it is possible).
13888  PromOpHandles.emplace_front(PromOp);
13889  continue;
13890  }
13891 
13892  // For SELECT and SELECT_CC nodes, we do a similar check for any
13893  // to-be-promoted comparison inputs.
13894  if (PromOp.getOpcode() == ISD::SELECT ||
13895  PromOp.getOpcode() == ISD::SELECT_CC) {
13896  if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13897  PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13898  (SelectTruncOp[1].count(PromOp.getNode()) &&
13899  PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13900  PromOpHandles.emplace_front(PromOp);
13901  continue;
13902  }
13903  }
13904 
13905  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13906  PromOp.getNode()->op_end());
13907 
13908  // If this node has constant inputs, then they'll need to be promoted here.
13909  for (unsigned i = 0; i < 2; ++i) {
13910  if (!isa<ConstantSDNode>(Ops[C+i]))
13911  continue;
13912  if (Ops[C+i].getValueType() == N->getValueType(0))
13913  continue;
13914 
13915  if (N->getOpcode() == ISD::SIGN_EXTEND)
13916  Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13917  else if (N->getOpcode() == ISD::ZERO_EXTEND)
13918  Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13919  else
13920  Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13921  }
13922 
13923  // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13924  // truncate them again to the original value type.
13925  if (PromOp.getOpcode() == ISD::SELECT ||
13926  PromOp.getOpcode() == ISD::SELECT_CC) {
13927  auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13928  if (SI0 != SelectTruncOp[0].end())
13929  Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13930  auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13931  if (SI1 != SelectTruncOp[1].end())
13932  Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13933  }
13934 
13935  DAG.ReplaceAllUsesOfValueWith(PromOp,
13936  DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13937  }
13938 
13939  // Now we're left with the initial extension itself.
13940  if (!ReallyNeedsExt)
13941  return N->getOperand(0);
13942 
13943  // To zero extend, just mask off everything except for the first bit (in the
13944  // i1 case).
13945  if (N->getOpcode() == ISD::ZERO_EXTEND)
13946  return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13948  N->getValueSizeInBits(0), PromBits),
13949  dl, N->getValueType(0)));
13950 
13951  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13952  "Invalid extension type");
13953  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13954  SDValue ShiftCst =
13955  DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13956  return DAG.getNode(
13957  ISD::SRA, dl, N->getValueType(0),
13958  DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13959  ShiftCst);
13960 }
13961 
13962 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13963  DAGCombinerInfo &DCI) const {
13964  assert(N->getOpcode() == ISD::SETCC &&
13965  "Should be called with a SETCC node");
13966 
13967  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13968  if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13969  SDValue LHS = N->getOperand(0);
13970  SDValue RHS = N->getOperand(1);
13971 
13972  // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13973  if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13974  LHS.hasOneUse())
13975  std::swap(LHS, RHS);
13976 
13977  // x == 0-y --> x+y == 0
13978  // x != 0-y --> x+y != 0
13979  if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13980  RHS.hasOneUse()) {
13981  SDLoc DL(N);
13982  SelectionDAG &DAG = DCI.DAG;
13983  EVT VT = N->getValueType(0);
13984  EVT OpVT = LHS.getValueType();
13985  SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13986  return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13987  }
13988  }
13989 
13990  return DAGCombineTruncBoolExt(N, DCI);
13991 }
13992 
13993 // Is this an extending load from an f32 to an f64?
13994 static bool isFPExtLoad(SDValue Op) {
13995  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13996  return LD->getExtensionType() == ISD::EXTLOAD &&
13997  Op.getValueType() == MVT::f64;
13998  return false;
13999 }
14000 
14001 /// Reduces the number of fp-to-int conversion when building a vector.
14002 ///
14003 /// If this vector is built out of floating to integer conversions,
14004 /// transform it to a vector built out of floating point values followed by a
14005 /// single floating to integer conversion of the vector.
14006 /// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14007 /// becomes (fptosi (build_vector ($A, $B, ...)))
14008 SDValue PPCTargetLowering::
14009 combineElementTruncationToVectorTruncation(SDNode *N,
14010  DAGCombinerInfo &DCI) const {
14011  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14012  "Should be called with a BUILD_VECTOR node");
14013 
14014  SelectionDAG &DAG = DCI.DAG;
14015  SDLoc dl(N);
14016 
14017  SDValue FirstInput = N->getOperand(0);
14018  assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14019  "The input operand must be an fp-to-int conversion.");
14020 
14021  // This combine happens after legalization so the fp_to_[su]i nodes are
14022  // already converted to PPCSISD nodes.
14023  unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14024  if (FirstConversion == PPCISD::FCTIDZ ||
14025  FirstConversion == PPCISD::FCTIDUZ ||
14026  FirstConversion == PPCISD::FCTIWZ ||
14027  FirstConversion == PPCISD::FCTIWUZ) {
14028  bool IsSplat = true;
14029  bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14030  FirstConversion == PPCISD::FCTIWUZ;
14031  EVT SrcVT = FirstInput.getOperand(0).getValueType();
14033  EVT TargetVT = N->getValueType(0);
14034  for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14035  SDValue NextOp = N->getOperand(i);
14036  if (NextOp.getOpcode() != PPCISD::MFVSR)
14037  return SDValue();
14038  unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14039  if (NextConversion != FirstConversion)
14040  return SDValue();
14041  // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14042  // This is not valid if the input was originally double precision. It is
14043  // also not profitable to do unless this is an extending load in which
14044  // case doing this combine will allow us to combine consecutive loads.
14045  if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14046  return SDValue();
14047  if (N->getOperand(i) != FirstInput)
14048  IsSplat = false;
14049  }
14050 
14051  // If this is a splat, we leave it as-is since there will be only a single
14052  // fp-to-int conversion followed by a splat of the integer. This is better
14053  // for 32-bit and smaller ints and neutral for 64-bit ints.
14054  if (IsSplat)
14055  return SDValue();
14056 
14057  // Now that we know we have the right type of node, get its operands
14058  for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14059  SDValue In = N->getOperand(i).getOperand(0);
14060  if (Is32Bit) {
14061  // For 32-bit values, we need to add an FP_ROUND node (if we made it
14062  // here, we know that all inputs are extending loads so this is safe).
14063  if (In.isUndef())
14064  Ops.push_back(DAG.getUNDEF(SrcVT));
14065  else {
14066  SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
14067  MVT::f32, In.getOperand(0),
14068  DAG.getIntPtrConstant(1, dl));
14069  Ops.push_back(Trunc);
14070  }
14071  } else
14072  Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14073  }
14074 
14075  unsigned Opcode;
14076  if (FirstConversion == PPCISD::FCTIDZ ||
14077  FirstConversion == PPCISD::FCTIWZ)
14079  else
14081 
14082  EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14083  SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14084  return DAG.getNode(Opcode, dl, TargetVT, BV);
14085  }
14086  return SDValue();
14087 }
14088 
14089 /// Reduce the number of loads when building a vector.
14090 ///
14091 /// Building a vector out of multiple loads can be converted to a load
14092 /// of the vector type if the loads are consecutive. If the loads are
14093 /// consecutive but in descending order, a shuffle is added at the end
14094 /// to reorder the vector.
14096  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14097  "Should be called with a BUILD_VECTOR node");
14098 
14099  SDLoc dl(N);
14100 
14101  // Return early for non byte-sized type, as they can't be consecutive.
14102  if (!N->getValueType(0).getVectorElementType().isByteSized())
14103  return SDValue();
14104 
14105  bool InputsAreConsecutiveLoads = true;
14106  bool InputsAreReverseConsecutive = true;
14107  unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14108  SDValue FirstInput = N->getOperand(0);
14109  bool IsRoundOfExtLoad = false;
14110 
14111  if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14112  FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14113  LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
14114  IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
14115  }
14116  // Not a build vector of (possibly fp_rounded) loads.
14117  if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14118  N->getNumOperands() == 1)
14119  return SDValue();
14120 
14121  for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14122  // If any inputs are fp_round(extload), they all must be.
14123  if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14124  return SDValue();
14125 
14126  SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14127  N->getOperand(i);
14128  if (NextInput.getOpcode() != ISD::LOAD)
14129  return SDValue();
14130 
14131  SDValue PreviousInput =
14132  IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14133  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
14134  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
14135 
14136  // If any inputs are fp_round(extload), they all must be.
14137  if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14138  return SDValue();
14139 
14140  if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
14141  InputsAreConsecutiveLoads = false;
14142  if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
14143  InputsAreReverseConsecutive = false;
14144 
14145  // Exit early if the loads are neither consecutive nor reverse consecutive.
14146  if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14147  return SDValue();
14148  }
14149 
14150  assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14151  "The loads cannot be both consecutive and reverse consecutive.");
14152 
14153  SDValue FirstLoadOp =
14154  IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
14155  SDValue LastLoadOp =
14156  IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
14157  N->getOperand(N->getNumOperands()-1);
14158 
14159  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
14160  LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
14161  if (InputsAreConsecutiveLoads) {
14162  assert(LD1 && "Input needs to be a LoadSDNode.");
14163  return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
14164  LD1->getBasePtr(), LD1->getPointerInfo(),
14165  LD1->getAlign());
14166  }
14167  if (InputsAreReverseConsecutive) {
14168  assert(LDL && "Input needs to be a LoadSDNode.");
14169  SDValue Load =
14170  DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), LDL->getBasePtr(),
14171  LDL->getPointerInfo(), LDL->getAlign());
14173  for (int i = N->getNumOperands() - 1; i >= 0; i--)
14174  Ops.push_back(i);
14175 
14176  return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
14177  DAG.getUNDEF(N->getValueType(0)), Ops);
14178  }
14179  return SDValue();
14180 }
14181 
14182 // This function adds the required vector_shuffle needed to get
14183 // the elements of the vector extract in the correct position
14184 // as specified by the CorrectElems encoding.
14186  SDValue Input, uint64_t Elems,
14187  uint64_t CorrectElems) {
14188  SDLoc dl(N);
14189 
14190  unsigned NumElems = Input.getValueType().getVectorNumElements();
14191  SmallVector<int, 16> ShuffleMask(NumElems, -1);
14192 
14193  // Knowing the element indices being extracted from the original
14194  // vector and the order in which they're being inserted, just put
14195  // them at element indices required for the instruction.
14196  for (unsigned i = 0; i < N->getNumOperands(); i++) {
14197  if (DAG.getDataLayout().isLittleEndian())
14198  ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14199  else
14200  ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14201  CorrectElems = CorrectElems >> 8;
14202  Elems = Elems >> 8;
14203  }
14204 
14205  SDValue Shuffle =
14206  DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14207  DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14208 
14209  EVT VT = N->getValueType(0);
14210  SDValue Conv = DAG.getBitcast(VT, Shuffle);
14211 
14212  EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14213  Input.getValueType().getVectorElementType(),
14214  VT.getVectorNumElements());
14215  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14216  DAG.getValueType(ExtVT));
14217 }
14218 
14219 // Look for build vector patterns where input operands come from sign
14220 // extended vector_extract elements of specific indices. If the correct indices
14221 // aren't used, add a vector shuffle to fix up the indices and create
14222 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14223 // during instruction selection.
14225  // This array encodes the indices that the vector sign extend instructions
14226  // extract from when extending from one type to another for both BE and LE.
14227  // The right nibble of each byte corresponds to the LE incides.
14228  // and the left nibble of each byte corresponds to the BE incides.
14229  // For example: 0x3074B8FC byte->word
14230  // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14231  // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14232  // For example: 0x000070F8 byte->double word
14233  // For LE: the allowed indices are: 0x0,0x8
14234  // For BE: the allowed indices are: 0x7,0xF
14235  uint64_t TargetElems[] = {
14236  0x3074B8FC, // b->w
14237  0x000070F8, // b->d
14238  0x10325476, // h->w
14239  0x00003074, // h->d
14240  0x00001032, // w->d
14241  };
14242 
14243  uint64_t Elems = 0;
14244  int Index;
14245  SDValue Input;
14246 
14247  auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14248  if (!Op)
14249  return false;
14250  if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14251  Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14252  return false;
14253 
14254  // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14255  // of the right width.
14256  SDValue Extract = Op.getOperand(0);
14257  if (Extract.getOpcode() == ISD::ANY_EXTEND)
14258  Extract = Extract.getOperand(0);
14259  if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14260  return false;
14261 
14262  ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14263  if (!ExtOp)
14264  return false;
14265 
14266  Index = ExtOp->getZExtValue();
14267  if (Input && Input != Extract.getOperand(0))
14268  return false;
14269 
14270  if (!Input)
14271  Input = Extract.getOperand(0);
14272 
14273  Elems = Elems << 8;
14274  Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14275  Elems |= Index;
14276 
14277  return true;
14278  };
14279 
14280  // If the build vector operands aren't sign extended vector extracts,
14281  // of the same input vector, then return.
14282  for (unsigned i = 0; i < N->getNumOperands(); i++) {
14283  if (!isSExtOfVecExtract(N->getOperand(i))) {
14284  return SDValue();
14285  }
14286  }
14287 
14288  // If the vector extract indicies are not correct, add the appropriate
14289  // vector_shuffle.
14290  int TgtElemArrayIdx;
14291  int InputSize = Input.getValueType().getScalarSizeInBits();
14292  int OutputSize = N->getValueType(0).getScalarSizeInBits();
14293  if (InputSize + OutputSize == 40)
14294  TgtElemArrayIdx = 0;
14295  else if (InputSize + OutputSize == 72)
14296  TgtElemArrayIdx = 1;
14297  else if (InputSize + OutputSize == 48)
14298  TgtElemArrayIdx = 2;
14299  else if (InputSize + OutputSize == 80)
14300  TgtElemArrayIdx = 3;
14301  else if (InputSize + OutputSize == 96)
14302  TgtElemArrayIdx = 4;
14303  else
14304  return SDValue();
14305 
14306  uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14307  CorrectElems = DAG.getDataLayout().isLittleEndian()
14308  ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14309  : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14310  if (Elems != CorrectElems) {
14311  return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14312  }
14313 
14314  // Regular lowering will catch cases where a shuffle is not needed.
14315  return SDValue();
14316 }
14317 
14318 // Look for the pattern of a load from a narrow width to i128, feeding
14319 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14320 // (LXVRZX). This node represents a zero extending load that will be matched
14321 // to the Load VSX Vector Rightmost instructions.
14323  SDLoc DL(N);
14324 
14325  // This combine is only eligible for a BUILD_VECTOR of v1i128.
14326  if (N->getValueType(0) != MVT::v1i128)
14327  return SDValue();
14328 
14329  SDValue Operand = N->getOperand(0);
14330  // Proceed with the transformation if the operand to the BUILD_VECTOR
14331  // is a load instruction.
14332  if (Operand.getOpcode() != ISD::LOAD)
14333  return SDValue();
14334 
14335  auto *LD = cast<LoadSDNode>(Operand);
14336  EVT MemoryType = LD->getMemoryVT();
14337 
14338  // This transformation is only valid if the we are loading either a byte,
14339  // halfword, word, or doubleword.
14340  bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
14342 
14343  // Ensure that the load from the narrow width is being zero extended to i128.
14344  if (!ValidLDType ||
14345  (LD->getExtensionType() != ISD::ZEXTLOAD &&
14346  LD->getExtensionType() != ISD::EXTLOAD))
14347  return SDValue();
14348 
14349  SDValue LoadOps[] = {
14350  LD->getChain(), LD->getBasePtr(),
14351  DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
14352 
14355  LoadOps, MemoryType, LD->getMemOperand());
14356 }
14357 
14358 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14359  DAGCombinerInfo &DCI) const {
14360  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14361  "Should be called with a BUILD_VECTOR node");
14362 
14363  SelectionDAG &DAG = DCI.DAG;
14364  SDLoc dl(N);
14365 
14366  if (!Subtarget.hasVSX())
14367  return SDValue();
14368 
14369  // The target independent DAG combiner will leave a build_vector of
14370  // float-to-int conversions intact. We can generate MUCH better code for
14371  // a float-to-int conversion of a vector of floats.
14372  SDValue FirstInput = N->getOperand(0);
14373  if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14374  SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14375  if (Reduced)
14376  return Reduced;
14377  }
14378 
14379  // If we're building a vector out of consecutive loads, just load that
14380  // vector type.
14381  SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
14382  if (Reduced)
14383  return Reduced;
14384 
14385  // If we're building a vector out of extended elements from another vector
14386  // we have P9 vector integer extend instructions. The code assumes legal
14387  // input types (i.e. it can't handle things like v4i16) so do not run before
14388  // legalization.
14389  if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14390  Reduced = combineBVOfVecSExt(N, DAG);
14391  if (Reduced)
14392  return Reduced;
14393  }
14394 
14395  // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14396  // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14397  // is a load from <valid narrow width> to i128.
14398  if (Subtarget.isISA3_1()) {
14399  SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14400  if (BVOfZLoad)
14401  return BVOfZLoad;
14402  }
14403 
14404  if (N->getValueType(0) != MVT::v2f64)
14405  return SDValue();
14406 
14407  // Looking for:
14408  // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14409  if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14410  FirstInput.getOpcode() != ISD::UINT_TO_FP)
14411  return SDValue();
14412  if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14413  N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14414  return SDValue();
14415  if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14416  return SDValue();
14417 
14418  SDValue Ext1 = FirstInput.getOperand(0);
14419  SDValue Ext2 = N->getOperand(1).getOperand(0);
14420  if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14422  return SDValue();
14423 
14424  ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
14425  ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
14426  if (!Ext1Op || !Ext2Op)
14427  return SDValue();
14428  if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14429  Ext1.getOperand(0) != Ext2.getOperand(0))
14430  return SDValue();
14431 
14432  int FirstElem = Ext1Op->getZExtValue();
14433  int SecondElem = Ext2Op->getZExtValue();
14434  int SubvecIdx;
14435  if (FirstElem == 0 && SecondElem == 1)
14436  SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14437  else if (FirstElem == 2 && SecondElem == 3)
14438  SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14439  else
14440  return SDValue();
14441 
14442  SDValue SrcVec = Ext1.getOperand(0);
14443  auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14445  return DAG.getNode(NodeType, dl, MVT::v2f64,
14446  SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
14447 }
14448 
14449 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14450  DAGCombinerInfo &DCI) const {
14451  assert((N->getOpcode() == ISD::SINT_TO_FP ||
14452  N->getOpcode() == ISD::UINT_TO_FP) &&
14453  "Need an int -> FP conversion node here");
14454 
14455  if (useSoftFloat() || !Subtarget.has64BitSupport())
14456  return SDValue();
14457 
14458  SelectionDAG &DAG = DCI.DAG;
14459  SDLoc dl(N);
14460  SDValue Op(N, 0);
14461 
14462  // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14463  // from the hardware.
14464  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14465  return SDValue();
14466  if (!Op.getOperand(0).getValueType().isSimple())
14467  return SDValue();
14468  if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14469  Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14470  return SDValue();
14471 
14472  SDValue FirstOperand(Op.getOperand(0));
14473  bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14474  (FirstOperand.getValueType() == MVT::i8 ||
14475  FirstOperand.getValueType() == MVT::i16);
14476  if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14477  bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14478  bool DstDouble = Op.getValueType() == MVT::f64;
14479  unsigned ConvOp = Signed ?
14480  (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
14481  (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14482  SDValue WidthConst =
14483  DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14484  dl, false);
14485  LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14486  SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14489  Ops, MVT::i8, LDN->getMemOperand());
14490 
14491  // For signed conversion, we need to sign-extend the value in the VSR
14492  if (Signed) {
14493  SDValue ExtOps[] = { Ld, WidthConst };
14494  SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14495  return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14496  } else
14497  return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14498  }
14499 
14500 
14501  // For i32 intermediate values, unfortunately, the conversion functions
14502  // leave the upper 32 bits of the value are undefined. Within the set of
14503  // scalar instructions, we have no method for zero- or sign-extending the
14504  // value. Thus, we cannot handle i32 intermediate values here.
14505  if (Op.getOperand(0).getValueType() == MVT::i32)
14506  return SDValue();
14507 
14508  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
14509  "UINT_TO_FP is supported only with FPCVT");
14510 
14511  // If we have FCFIDS, then use it when converting to single-precision.
14512  // Otherwise, convert to double-precision and then round.
14513  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14514  ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14515  : PPCISD::FCFIDS)
14516  : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14517  : PPCISD::FCFID);
14518  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14519  ? MVT::f32
14520  : MVT::f64;
14521 
14522  // If we're converting from a float, to an int, and back to a float again,
14523  // then we don't need the store/load pair at all.
14524  if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14525  Subtarget.hasFPCVT()) ||
14526  (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14527  SDValue Src = Op.getOperand(0).getOperand(0);
14528  if (Src.getValueType() == MVT::f32) {
14529  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14530  DCI.AddToWorklist(Src.getNode());
14531  } else if (Src.getValueType() != MVT::f64) {
14532  // Make sure that we don't pick up a ppc_fp128 source value.
14533  return SDValue();
14534  }
14535 
14536  unsigned FCTOp =
14537  Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14539 
14540  SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14541  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14542 
14543  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14544  FP = DAG.getNode(ISD::FP_ROUND, dl,
14545  MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
14546  DCI.AddToWorklist(FP.getNode());
14547  }
14548 
14549  return FP;
14550  }
14551 
14552  return SDValue();
14553 }
14554 
14555 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14556 // builtins) into loads with swaps.
14558  DAGCombinerInfo &DCI) const {
14559  // Delay VSX load for LE combine until after LegalizeOps to prioritize other
14560  // load combines.
14561  if (DCI.isBeforeLegalizeOps())
14562  return SDValue();
14563 
14564  SelectionDAG &DAG = DCI.DAG;
14565  SDLoc dl(N);
14566  SDValue Chain;
14567  SDValue Base;
14568  MachineMemOperand *MMO;
14569 
14570  switch (N->getOpcode()) {
14571  default:
14572  llvm_unreachable("Unexpected opcode for little endian VSX load");
14573  case ISD::LOAD: {
14574  LoadSDNode *LD = cast<LoadSDNode>(N);
14575  Chain = LD->getChain();
14576  Base = LD->getBasePtr();
14577  MMO = LD->getMemOperand();
14578  // If the MMO suggests this isn't a load of a full vector, leave
14579  // things alone. For a built-in, we have to make the change for
14580  // correctness, so if there is a size problem that will be a bug.
14581  if (MMO->getSize() < 16)
14582  return SDValue();
14583  break;
14584  }
14585  case ISD::INTRINSIC_W_CHAIN: {
14586  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14587  Chain = Intrin->getChain();
14588  // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14589  // us what we want. Get operand 2 instead.
14590  Base = Intrin->getOperand(2);
14591  MMO = Intrin->getMemOperand();
14592  break;
14593  }
14594  }
14595 
14596  MVT VecTy = N->getValueType(0).getSimpleVT();
14597 
14598  SDValue LoadOps[] = { Chain, Base };
14601  LoadOps, MVT::v2f64, MMO);
14602 
14603  DCI.AddToWorklist(Load.getNode());
14604  Chain = Load.getValue(1);
14605  SDValue Swap = DAG.getNode(
14606  PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
14607  DCI.AddToWorklist(Swap.getNode());
14608 
14609  // Add a bitcast if the resulting load type doesn't match v2f64.
14610  if (VecTy != MVT::v2f64) {
14611  SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
14612  DCI.AddToWorklist(N.getNode());
14613  // Package {bitcast value, swap's chain} to match Load's shape.
14614  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
14615  N, Swap.getValue(1));
14616  }
14617 
14618  return Swap;
14619 }
14620 
14621 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
14622 // builtins) into stores with swaps.
14624  DAGCombinerInfo &DCI) const {
14625  // Delay VSX store for LE combine until after LegalizeOps to prioritize other
14626  // store combines.
14627  if (DCI.isBeforeLegalizeOps())
14628  return SDValue();
14629 
14630  SelectionDAG &DAG = DCI.DAG;
14631  SDLoc dl(N);
14632  SDValue Chain;
14633  SDValue Base;
14634  unsigned SrcOpnd;
14635  MachineMemOperand *MMO;
14636 
14637  switch (N->getOpcode()) {
14638  default:
14639  llvm_unreachable("Unexpected opcode for little endian VSX store");
14640  case ISD::STORE: {
14641  StoreSDNode *ST = cast<StoreSDNode>(N);
14642  Chain = ST->getChain();
14643  Base = ST->getBasePtr();
14644  MMO = ST->getMemOperand();
14645  SrcOpnd = 1;
14646  // If the MMO suggests this isn't a store of a full vector, leave
14647  // things alone. For a built-in, we have to make the change for
14648  // correctness, so if there is a size problem that will be a bug.
14649  if (MMO->getSize() < 16)
14650  return SDValue();
14651  break;
14652  }
14653  case ISD::INTRINSIC_VOID: {
14654  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14655  Chain = Intrin->getChain();
14656  // Intrin->getBasePtr() oddly does not get what we want.
14657  Base = Intrin->getOperand(3);
14658  MMO = Intrin->getMemOperand();
14659  SrcOpnd = 2;
14660  break;
14661  }
14662  }
14663 
14664  SDValue Src = N->getOperand(SrcOpnd);
14665  MVT VecTy = Src.getValueType().getSimpleVT();
14666 
14667  // All stores are done as v2f64 and possible bit cast.
14668  if (VecTy != MVT::v2f64) {
14669  Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
14670  DCI.AddToWorklist(Src.getNode());
14671  }
14672 
14673  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
14674  DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
14675  DCI.AddToWorklist(Swap.getNode());
14676  Chain = Swap.getValue(1);
14677  SDValue StoreOps[] = { Chain, Swap, Base };
14679  DAG.getVTList(MVT::Other),
14680  StoreOps, VecTy, MMO);
14681  DCI.AddToWorklist(Store.getNode());
14682  return Store;
14683 }
14684 
14685 // Handle DAG combine for STORE (FP_TO_INT F).
14686 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
14687  DAGCombinerInfo &DCI) const {
14688 
14689  SelectionDAG &DAG = DCI.DAG;
14690  SDLoc dl(N);
14691  unsigned Opcode = N->getOperand(1).getOpcode();
14692 
14694  && "Not a FP_TO_INT Instruction!");
14695 
14696  SDValue Val = N->getOperand(1).getOperand(0);
14697  EVT Op1VT = N->getOperand(1).getValueType();
14698  EVT ResVT = Val.getValueType();
14699 
14700  if (!isTypeLegal(ResVT))
14701  return SDValue();
14702 
14703  // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14704  bool ValidTypeForStoreFltAsInt =
14705  (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14706  (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14707 
14708  if (ResVT == MVT::f128 && !Subtarget.hasP9Vector())
14709  return SDValue();
14710 
14711  if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14712  cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14713  return SDValue();
14714 
14715  // Extend f32 values to f64
14716  if (ResVT.getScalarSizeInBits() == 32) {
14717  Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14718  DCI.AddToWorklist(Val.getNode());
14719  }
14720 
14721  // Set signed or unsigned conversion opcode.
14722  unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14725 
14726  Val = DAG.getNode(ConvOpcode,
14727  dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14728  DCI.AddToWorklist(Val.getNode());
14729 
14730  // Set number of bytes being converted.
14731  unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14732  SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14733  DAG.getIntPtrConstant(ByteSize, dl, false),
14734  DAG.getValueType(Op1VT) };
14735 
14737  DAG.getVTList(MVT::Other), Ops,
14738  cast<StoreSDNode>(N)->getMemoryVT(),
14739  cast<StoreSDNode>(N)->getMemOperand());
14740 
14741  DCI.AddToWorklist(Val.getNode());
14742  return Val;
14743 }
14744 
14745 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
14746  // Check that the source of the element keeps flipping
14747  // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
14748  bool PrevElemFromFirstVec = Mask[0] < NumElts;
14749  for (int i = 1, e = Mask.size(); i < e; i++) {
14750  if (PrevElemFromFirstVec && Mask[i] < NumElts)
14751  return false;
14752  if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
14753  return false;
14754  PrevElemFromFirstVec = !PrevElemFromFirstVec;
14755  }
14756  return true;
14757 }
14758 
14759 static bool isSplatBV(SDValue Op) {
14760  if (Op.getOpcode() != ISD::BUILD_VECTOR)
14761  return false;
14762  SDValue FirstOp;
14763 
14764  // Find first non-undef input.
14765  for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
14766  FirstOp = Op.getOperand(i);
14767  if (!FirstOp.isUndef())
14768  break;
14769  }
14770 
14771  // All inputs are undef or the same as the first non-undef input.
14772  for (int i = 1, e = Op.getNumOperands(); i < e; i++)
14773  if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
14774  return false;
14775  return true;
14776 }
14777 
14779  if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14780  return Op;
14781  if (Op.getOpcode() != ISD::BITCAST)
14782  return SDValue();
14783  Op = Op.getOperand(0);
14784  if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14785  return Op;
14786  return SDValue();
14787 }
14788 
14789 // Fix up the shuffle mask to account for the fact that the result of
14790 // scalar_to_vector is not in lane zero. This just takes all values in
14791 // the ranges specified by the min/max indices and adds the number of
14792 // elements required to ensure each element comes from the respective
14793 // position in the valid lane.
14794 // On little endian, that's just the corresponding element in the other
14795 // half of the vector. On big endian, it is in the same half but right
14796 // justified rather than left justified in that half.
14798  int LHSMaxIdx, int RHSMinIdx,
14799  int RHSMaxIdx, int HalfVec,
14800  unsigned ValidLaneWidth,
14801  const PPCSubtarget &Subtarget) {
14802  for (int i = 0, e = ShuffV.size(); i < e; i++) {
14803  int Idx = ShuffV[i];
14804  if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14805  ShuffV[i] +=
14806  Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
14807  }
14808 }
14809 
14810 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
14811 // the original is:
14812 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
14813 // In such a case, just change the shuffle mask to extract the element
14814 // from the permuted index.
14816  const PPCSubtarget &Subtarget) {
14817  SDLoc dl(OrigSToV);
14818  EVT VT = OrigSToV.getValueType();
14819  assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14820  "Expecting a SCALAR_TO_VECTOR here");
14821  SDValue Input = OrigSToV.getOperand(0);
14822 
14823  if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14824  ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14825  SDValue OrigVector = Input.getOperand(0);
14826 
14827  // Can't handle non-const element indices or different vector types
14828  // for the input to the extract and the output of the scalar_to_vector.
14829  if (Idx && VT == OrigVector.getValueType()) {
14830  unsigned NumElts = VT.getVectorNumElements();
14831  assert(
14832  NumElts > 1 &&
14833  "Cannot produce a permuted scalar_to_vector for one element vector");
14834  SmallVector<int, 16> NewMask(NumElts, -1);
14835  unsigned ResultInElt = NumElts / 2;
14836  ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
14837  NewMask[ResultInElt] = Idx->getZExtValue();
14838  return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
14839  }
14840  }
14841  return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
14842  OrigSToV.getOperand(0));
14843 }
14844 
14845 // On little endian subtargets, combine shuffles such as:
14846 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
14847 // into:
14848 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
14849 // because the latter can be matched to a single instruction merge.
14850 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
14851 // to put the value into element zero. Adjust the shuffle mask so that the
14852 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
14853 // On big endian targets, this is still useful for SCALAR_TO_VECTOR
14854 // nodes with elements smaller than doubleword because all the ways
14855 // of getting scalar data into a vector register put the value in the
14856 // rightmost element of the left half of the vector.
14857 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14858  SelectionDAG &DAG) const {
14859  SDValue LHS = SVN->getOperand(0);
14860  SDValue RHS = SVN->getOperand(1);
14861  auto Mask = SVN->getMask();
14862  int NumElts = LHS.getValueType().getVectorNumElements();
14863  SDValue Res(SVN, 0);
14864  SDLoc dl(SVN);
14865  bool IsLittleEndian = Subtarget.isLittleEndian();
14866 
14867  // On big endian targets this is only useful for subtargets with direct moves.
14868  // On little endian targets it would be useful for all subtargets with VSX.
14869  // However adding special handling for LE subtargets without direct moves
14870  // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
14871  // which includes direct moves.
14872  if (!Subtarget.hasDirectMove())
14873  return Res;
14874 
14875  // If this is not a shuffle of a shuffle and the first element comes from
14876  // the second vector, canonicalize to the commuted form. This will make it
14877  // more likely to match one of the single instruction patterns.
14878  if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14879  RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14880  std::swap(LHS, RHS);
14881  Res = DAG.getCommutedVectorShuffle(*SVN);
14882  Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14883  }
14884 
14885  // Adjust the shuffle mask if either input vector comes from a
14886  // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14887  // form (to prevent the need for a swap).
14888  SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14889  SDValue SToVLHS = isScalarToVec(LHS);
14890  SDValue SToVRHS = isScalarToVec(RHS);
14891  if (SToVLHS || SToVRHS) {
14892  // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
14893  // same type and have differing element sizes, then do not perform
14894  // the following transformation. The current transformation for
14895  // SCALAR_TO_VECTOR assumes that both input vectors have the same
14896  // element size. This will be updated in the future to account for
14897  // differing sizes of the LHS and RHS.
14898  if (SToVLHS && SToVRHS &&
14899  (SToVLHS.getValueType().getScalarSizeInBits() !=
14900  SToVRHS.getValueType().getScalarSizeInBits()))
14901  return Res;
14902 
14903  int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14904  : SToVRHS.getValueType().getVectorNumElements();
14905  int NumEltsOut = ShuffV.size();
14906  // The width of the "valid lane" (i.e. the lane that contains the value that
14907  // is vectorized) needs to be expressed in terms of the number of elements
14908  // of the shuffle. It is thereby the ratio of the values before and after
14909  // any bitcast.
14910  unsigned ValidLaneWidth =
14911  SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
14912  LHS.getValueType().getScalarSizeInBits()
14913  : SToVRHS.getValueType().getScalarSizeInBits() /
14914  RHS.getValueType().getScalarSizeInBits();
14915 
14916  // Initially assume that neither input is permuted. These will be adjusted
14917  // accordingly if either input is.
14918  int LHSMaxIdx = -1;
14919  int RHSMinIdx = -1;
14920  int RHSMaxIdx = -1;
14921  int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14922 
14923  // Get the permuted scalar to vector nodes for the source(s) that come from
14924  // ISD::SCALAR_TO_VECTOR.
14925  // On big endian systems, this only makes sense for element sizes smaller
14926  // than 64 bits since for 64-bit elements, all instructions already put
14927  // the value into element zero. Since scalar size of LHS and RHS may differ
14928  // after isScalarToVec, this should be checked using their own sizes.
14929  if (SToVLHS) {
14930  if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
14931  return Res;
14932  // Set up the values for the shuffle vector fixup.
14933  LHSMaxIdx = NumEltsOut / NumEltsIn;
14934  SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
14935  if (SToVLHS.getValueType() != LHS.getValueType())
14936  SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14937  LHS = SToVLHS;
14938  }
14939  if (SToVRHS) {
14940  if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
14941  return Res;
14942  RHSMinIdx = NumEltsOut;
14943  RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14944  SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
14945  if (SToVRHS.getValueType() != RHS.getValueType())
14946  SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14947  RHS = SToVRHS;
14948  }
14949 
14950  // Fix up the shuffle mask to reflect where the desired element actually is.
14951  // The minimum and maximum indices that correspond to element zero for both
14952  // the LHS and RHS are computed and will control which shuffle mask entries
14953  // are to be changed. For example, if the RHS is permuted, any shuffle mask
14954  // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
14955  fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14956  HalfVec, ValidLaneWidth, Subtarget);
14957  Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14958 
14959  // We may have simplified away the shuffle. We won't be able to do anything
14960  // further with it here.
14961  if (!isa<ShuffleVectorSDNode>(Res))
14962  return Res;
14963  Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14964  }
14965 
14966  SDValue TheSplat = IsLittleEndian ? RHS : LHS;
14967  // The common case after we commuted the shuffle is that the RHS is a splat
14968  // and we have elements coming in from the splat at indices that are not
14969  // conducive to using a merge.
14970  // Example:
14971  // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14972  if (!isSplatBV(TheSplat))
14973  return Res;
14974 
14975  // We are looking for a mask such that all even elements are from
14976  // one vector and all odd elements from the other.
14977  if (!isAlternatingShuffMask(Mask, NumElts))
14978  return Res;
14979 
14980  // Adjust the mask so we are pulling in the same index from the splat
14981  // as the index from the interesting vector in consecutive elements.
14982  if (IsLittleEndian) {
14983  // Example (even elements from first vector):
14984  // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14985  if (Mask[0] < NumElts)
14986  for (int i = 1, e = Mask.size(); i < e; i += 2) {
14987  if (ShuffV[i] < 0)
14988  continue;
14989  ShuffV[i] = (ShuffV[i - 1] + NumElts);
14990  }
14991  // Example (odd elements from first vector):
14992  // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14993  else
14994  for (int i = 0, e = Mask.size(); i < e; i += 2) {
14995  if (ShuffV[i] < 0)
14996  continue;
14997  ShuffV[i] = (ShuffV[i + 1] + NumElts);
14998  }
14999  } else {
15000  // Example (even elements from first vector):
15001  // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15002  if (Mask[0] < NumElts)
15003  for (int i = 0, e = Mask.size(); i < e; i += 2) {
15004  if (ShuffV[i] < 0)
15005  continue;
15006  ShuffV[i] = ShuffV[i + 1] - NumElts;
15007  }
15008  // Example (odd elements from first vector):
15009  // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15010  else
15011  for (int i = 1, e = Mask.size(); i < e; i += 2) {
15012  if (ShuffV[i] < 0)
15013  continue;
15014  ShuffV[i] = ShuffV[i - 1] - NumElts;
15015  }
15016  }
15017 
15018  // If the RHS has undefs, we need to remove them since we may have created
15019  // a shuffle that adds those instead of the splat value.
15020  SDValue SplatVal =
15021  cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15022  TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15023 
15024  if (IsLittleEndian)
15025  RHS = TheSplat;
15026  else
15027  LHS = TheSplat;
15028  return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15029 }
15030 
15031 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15032  LSBaseSDNode *LSBase,
15033  DAGCombinerInfo &DCI) const {
15034  assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15035  "Not a reverse memop pattern!");
15036 
15037  auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15038  auto Mask = SVN->getMask();
15039  int i = 0;
15040  auto I = Mask.rbegin();
15041  auto E = Mask.rend();
15042 
15043  for (; I != E; ++I) {
15044  if (*I != i)
15045  return false;
15046  i++;
15047  }
15048  return true;
15049  };
15050 
15051  SelectionDAG &DAG = DCI.DAG;
15052  EVT VT = SVN->getValueType(0);
15053 
15054  if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15055  return SDValue();
15056 
15057  // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15058  // See comment in PPCVSXSwapRemoval.cpp.
15059  // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15060  if (!Subtarget.hasP9Vector())
15061  return SDValue();
15062 
15063  if(!IsElementReverse(SVN))
15064  return SDValue();
15065 
15066  if (LSBase->getOpcode() == ISD::LOAD) {
15067  // If the load return value 0 has more than one user except the
15068  // shufflevector instruction, it is not profitable to replace the
15069  // shufflevector with a reverse load.
15070  for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15071  UI != UE; ++UI)
15072  if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15073  return SDValue();
15074 
15075  SDLoc dl(LSBase);
15076  SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15077  return DAG.getMemIntrinsicNode(
15078  PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15079  LSBase->getMemoryVT(), LSBase->getMemOperand());
15080  }
15081 
15082  if (LSBase->getOpcode() == ISD::STORE) {
15083  // If there are other uses of the shuffle, the swap cannot be avoided.
15084  // Forcing the use of an X-Form (since swapped stores only have
15085  // X-Forms) without removing the swap is unprofitable.
15086  if (!SVN->hasOneUse())
15087  return SDValue();
15088 
15089  SDLoc dl(LSBase);
15090  SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15091  LSBase->getBasePtr()};
15092  return DAG.getMemIntrinsicNode(
15093  PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15094  LSBase->getMemoryVT(), LSBase->getMemOperand());
15095  }
15096 
15097  llvm_unreachable("Expected a load or store node here");
15098 }
15099 
15101  DAGCombinerInfo &DCI) const {
15102  SelectionDAG &DAG = DCI.DAG;
15103  SDLoc dl(N);
15104  switch (N->getOpcode()) {
15105  default: break;
15106  case ISD::ADD:
15107  return combineADD(N, DCI);
15108  case ISD::SHL:
15109  return combineSHL(N, DCI);
15110  case ISD::SRA:
15111  return combineSRA(N, DCI);
15112  case ISD::SRL:
15113  return combineSRL(N, DCI);
15114  case ISD::MUL:
15115  return combineMUL(N, DCI);
15116  case ISD::FMA:
15117  case PPCISD::FNMSUB:
15118  return combineFMALike(N, DCI);
15119  case PPCISD::SHL:
15120  if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15121  return N->getOperand(0);
15122  break;
15123  case PPCISD::SRL:
15124  if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15125  return N->getOperand(0);
15126  break;
15127  case PPCISD::SRA:
15128  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15129  if (C->isZero() || // 0 >>s V -> 0.
15130  C->isAllOnes()) // -1 >>s V -> -1.
15131  return N->getOperand(0);
15132  }
15133  break;
15134  case ISD::SIGN_EXTEND:
15135  case ISD::ZERO_EXTEND:
15136  case ISD::ANY_EXTEND:
15137  return DAGCombineExtBoolTrunc(N, DCI);
15138  case ISD::TRUNCATE:
15139  return combineTRUNCATE(N, DCI);
15140  case ISD::SETCC:
15141  if (SDValue CSCC = combineSetCC(N, DCI))
15142  return CSCC;
15144  case ISD::SELECT_CC:
15145  return DAGCombineTruncBoolExt(N, DCI);
15146  case ISD::SINT_TO_FP:
15147  case ISD::UINT_TO_FP:
15148  return combineFPToIntToFP(N, DCI);
15149  case ISD::VECTOR_SHUFFLE:
15150  if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15151  LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15152  return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15153  }
15154  return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15155  case ISD::STORE: {
15156 
15157  EVT Op1VT = N->getOperand(1).getValueType();
15158  unsigned Opcode = N->getOperand(1).getOpcode();
15159 
15161  SDValue Val= combineStoreFPToInt(N, DCI);
15162  if (Val)
15163  return Val;
15164  }
15165 
15167  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15168  SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15169  if (Val)
15170  return Val;
15171  }
15172 
15173  // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15174  if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15175  N->getOperand(1).getNode()->hasOneUse() &&
15176  (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15177  (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15178 
15179  // STBRX can only handle simple types and it makes no sense to store less
15180  // two bytes in byte-reversed order.
15181  EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15182  if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15183  break;
15184 
15185  SDValue BSwapOp = N->getOperand(1).getOperand(0);
15186  // Do an any-extend to 32-bits if this is a half-word input.
15187  if (BSwapOp.getValueType() == MVT::i16)
15188  BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15189 
15190  // If the type of BSWAP operand is wider than stored memory width
15191  // it need to be shifted to the right side before STBRX.
15192  if (Op1VT.bitsGT(mVT)) {
15193  int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15194  BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15195  DAG.getConstant(Shift, dl, MVT::i32));
15196  // Need to truncate if this is a bswap of i64 stored as i32/i16.
15197  if (Op1VT == MVT::i64)
15198  BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15199  }
15200 
15201  SDValue Ops[] = {
15202  N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15203  };
15204  return
15206  Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15207  cast<StoreSDNode>(N)->getMemOperand());
15208  }
15209 
15210  // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15211  // So it can increase the chance of CSE constant construction.
15212  if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15213  isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15214  // Need to sign-extended to 64-bits to handle negative values.
15215  EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15216  uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15217  MemVT.getSizeInBits());
15218  SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15219 
15220  // DAG.getTruncStore() can't be used here because it doesn't accept
15221  // the general (base + offset) addressing mode.
15222  // So we use UpdateNodeOperands and setTruncatingStore instead.
15223  DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15224  N->getOperand(3));
15225  cast<StoreSDNode>(N)->setTruncatingStore(true);
15226  return SDValue(N, 0);
15227  }
15228 
15229  // For little endian, VSX stores require generating xxswapd/lxvd2x.
15230  // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15231  if (Op1VT.isSimple()) {
15232  MVT StoreVT = Op1VT.getSimpleVT();
15233  if (Subtarget.needsSwapsForVSXMemOps() &&
15234  (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15235  StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15236  return expandVSXStoreForLE(N, DCI);
15237  }
15238  break;
15239  }
15240  case ISD::LOAD: {
15241  LoadSDNode *LD = cast<LoadSDNode>(N);
15242  EVT VT = LD->getValueType(0);
15243 
15244  // For little endian, VSX loads require generating lxvd2x/xxswapd.
15245  // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15246  if (VT.isSimple()) {
15247  MVT LoadVT = VT.getSimpleVT();
15248  if (Subtarget.needsSwapsForVSXMemOps() &&
15249  (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15250  LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15251  return expandVSXLoadForLE(N, DCI);
15252  }
15253 
15254  // We sometimes end up with a 64-bit integer load, from which we extract
15255  // two single-precision floating-point numbers. This happens with
15256  // std::complex<float>, and other similar structures, because of the way we
15257  // canonicalize structure copies. However, if we lack direct moves,
15258  // then the final bitcasts from the extracted integer values to the
15259  // floating-point numbers turn into store/load pairs. Even with direct moves,
15260  // just loading the two floating-point numbers is likely better.
15261  auto ReplaceTwoFloatLoad = [&]() {
15262  if (VT != MVT::i64)
15263  return false;
15264 
15265  if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15266  LD->isVolatile())
15267  return false;
15268 
15269  // We're looking for a sequence like this:
15270  // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15271  // t16: i64 = srl t13, Constant:i32<32>
15272  // t17: i32 = truncate t16
15273  // t18: f32 = bitcast t17
15274  // t19: i32 = truncate t13
15275  // t20: f32 = bitcast t19
15276 
15277  if (!LD->hasNUsesOfValue(2, 0))
15278  return false;
15279 
15280  auto UI = LD->use_begin();
15281  while (UI.getUse().getResNo() != 0) ++UI;
15282  SDNode *Trunc = *UI++;
15283  while (UI.getUse().getResNo() != 0) ++UI;
15284  SDNode *RightShift = *UI;
15285  if (Trunc->getOpcode() != ISD::TRUNCATE)
15286  std::swap(Trunc, RightShift);
15287 
15288  if (Trunc->getOpcode() != ISD::TRUNCATE ||
15289  Trunc->getValueType(0) != MVT::i32 ||
15290  !Trunc->hasOneUse())
15291  return false;
15292  if (RightShift->getOpcode() != ISD::SRL ||
15293  !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15294  RightShift->getConstantOperandVal(1) != 32 ||
15295  !RightShift->hasOneUse())
15296  return false;
15297 
15298  SDNode *Trunc2 = *RightShift->use_begin();
15299  if (Trunc2->getOpcode() != ISD::TRUNCATE ||
15300  Trunc2->getValueType(0) != MVT::i32 ||
15301  !Trunc2->hasOneUse())
15302  return false;
15303 
15304  SDNode *Bitcast = *Trunc->use_begin();
15305  SDNode *Bitcast2 = *Trunc2->use_begin();
15306 
15307  if (Bitcast->getOpcode() != ISD::BITCAST ||
15308  Bitcast->getValueType(0) != MVT::f32)
15309  return false;
15310  if (Bitcast2->getOpcode() != ISD::BITCAST ||
15311  Bitcast2->getValueType(0) != MVT::f32)
15312  return false;
15313 
15314  if (Subtarget.isLittleEndian())
15315  std::swap(Bitcast, Bitcast2);
15316 
15317  // Bitcast has the second float (in memory-layout order) and Bitcast2
15318  // has the first one.
15319 
15320  SDValue BasePtr = LD->getBasePtr();
15321  if (LD->isIndexed()) {
15322  assert(LD->getAddressingMode() == ISD::PRE_INC &&
15323  "Non-pre-inc AM on PPC?");
15324  BasePtr =
15325  DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15326  LD->getOffset());
15327  }
15328 
15329  auto MMOFlags =
15330  LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
15331  SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
15332  LD->getPointerInfo(), LD->getAlign(),
15333  MMOFlags, LD->getAAInfo());
15334  SDValue AddPtr =
15335  DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
15336  BasePtr, DAG.getIntPtrConstant(4, dl));
15337  SDValue FloatLoad2 = DAG.getLoad(
15338  MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
15339  LD->getPointerInfo().getWithOffset(4),
15340  commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
15341 
15342  if (LD->isIndexed()) {
15343  // Note that DAGCombine should re-form any pre-increment load(s) from
15344  // what is produced here if that makes sense.
15345  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
15346  }
15347 
15348  DCI.CombineTo(Bitcast2, FloatLoad);
15349  DCI.CombineTo(Bitcast, FloatLoad2);
15350 
15351  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
15352  SDValue(FloatLoad2.getNode(), 1));
15353  return true;
15354  };
15355 
15356  if (ReplaceTwoFloatLoad())
15357  return SDValue(N, 0);
15358 
15359  EVT MemVT = LD->getMemoryVT();
15360  Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
15361  Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
15362  if (LD->isUnindexed() && VT.isVector() &&
15363  ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
15364  // P8 and later hardware should just use LOAD.
15365  !Subtarget.hasP8Vector() &&
15366  (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
15367  VT == MVT::v4f32))) &&
15368  LD->getAlign() < ABIAlignment) {
15369  // This is a type-legal unaligned Altivec load.
15370  SDValue Chain = LD->getChain();
15371  SDValue Ptr = LD->getBasePtr();
15372  bool isLittleEndian = Subtarget.isLittleEndian();
15373 
15374  // This implements the loading of unaligned vectors as described in
15375  // the venerable Apple Velocity Engine overview. Specifically:
15376  // https://developer.apple.com/hardwaredrivers/ve/alignment.html
15377  // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
15378  //
15379  // The general idea is to expand a sequence of one or more unaligned
15380  // loads into an alignment-based permutation-control instruction (lvsl
15381  // or lvsr), a series of regular vector loads (which always truncate
15382  // their input address to an aligned address), and a series of
15383  // permutations. The results of these permutations are the requested
15384  // loaded values. The trick is that the last "extra" load is not taken
15385  // from the address you might suspect (sizeof(vector) bytes after the
15386  // last requested load), but rather sizeof(vector) - 1 bytes after the
15387  // last requested vector. The point of this is to avoid a page fault if
15388  // the base address happened to be aligned. This works because if the
15389  // base address is aligned, then adding less than a full vector length
15390  // will cause the last vector in the sequence to be (re)loaded.
15391  // Otherwise, the next vector will be fetched as you might suspect was
15392  // necessary.
15393 
15394  // We might be able to reuse the permutation generation from
15395  // a different base address offset from this one by an aligned amount.
15396  // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
15397  // optimization later.
15398  Intrinsic::ID Intr, IntrLD, IntrPerm;
15399  MVT PermCntlTy, PermTy, LDTy;
15400  Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15401  : Intrinsic::ppc_altivec_lvsl;
15402  IntrLD = Intrinsic::ppc_altivec_lvx;
15403  IntrPerm = Intrinsic::ppc_altivec_vperm;
15404  PermCntlTy = MVT::v16i8;
15405  PermTy = MVT::v4i32;
15406  LDTy = MVT::v4i32;
15407 
15408  SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
15409 
15410  // Create the new MMO for the new base load. It is like the original MMO,
15411  // but represents an area in memory almost twice the vector size centered
15412  // on the original address. If the address is unaligned, we might start
15413  // reading up to (sizeof(vector)-1) bytes below the address of the
15414  // original unaligned load.
15415  MachineFunction &MF = DAG.getMachineFunction();
15416  MachineMemOperand *BaseMMO =
15417  MF.getMachineMemOperand(LD->getMemOperand(),
15418  -(long)MemVT.getStoreSize()+1,
15419  2*MemVT.getStoreSize()-1);
15420 
15421  // Create the new base load.
15422  SDValue LDXIntID =
15423  DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
15424  SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
15425  SDValue BaseLoad =
15427  DAG.getVTList(PermTy, MVT::Other),
15428  BaseLoadOps, LDTy, BaseMMO);
15429 
15430  // Note that the value of IncOffset (which is provided to the next
15431  // load's pointer info offset value, and thus used to calculate the
15432  // alignment), and the value of IncValue (which is actually used to
15433  // increment the pointer value) are different! This is because we
15434  // require the next load to appear to be aligned, even though it
15435  // is actually offset from the base pointer by a lesser amount.
15436  int IncOffset = VT.getSizeInBits() / 8;
15437  int IncValue = IncOffset;
15438 
15439  // Walk (both up and down) the chain looking for another load at the real
15440  // (aligned) offset (the alignment of the other load does not matter in
15441  // this case). If found, then do not use the offset reduction trick, as
15442  // that will prevent the loads from being later combined (as they would
15443  // otherwise be duplicates).
15444  if (!findConsecutiveLoad(LD, DAG))
15445  --IncValue;
15446 
15447  SDValue Increment =
15448  DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
15449  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
15450 
15451  MachineMemOperand *ExtraMMO =
15452  MF.getMachineMemOperand(LD->getMemOperand(),
15453  1, 2*MemVT.getStoreSize()-1);
15454  SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15455  SDValue ExtraLoad =
15457  DAG.getVTList(PermTy, MVT::Other),
15458  ExtraLoadOps, LDTy, ExtraMMO);
15459 
15461  BaseLoad.getValue(1), ExtraLoad.getValue(1));
15462 
15463  // Because vperm has a big-endian bias, we must reverse the order
15464  // of the input vectors and complement the permute control vector
15465  // when generating little endian code. We have already handled the
15466  // latter by using lvsr instead of lvsl, so just reverse BaseLoad
15467  // and ExtraLoad here.
15468  SDValue Perm;
15469  if (isLittleEndian)
15470  Perm = BuildIntrinsicOp(IntrPerm,
15471  ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15472  else
15473  Perm = BuildIntrinsicOp(IntrPerm,
15474  BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15475 
15476  if (VT != PermTy)
15477  Perm = Subtarget.hasAltivec()
15478  ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
15479  : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
15480  DAG.getTargetConstant(1, dl, MVT::i64));
15481  // second argument is 1 because this rounding
15482  // is always exact.
15483 
15484  // The output of the permutation is our loaded result, the TokenFactor is
15485  // our new chain.
15486  DCI.CombineTo(N, Perm, TF);
15487  return SDValue(N, 0);
15488  }
15489  }
15490  break;
15491  case ISD::INTRINSIC_WO_CHAIN: {
15492  bool isLittleEndian = Subtarget.isLittleEndian();
15493  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15494  Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15495  : Intrinsic::ppc_altivec_lvsl);
15496  if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15497  SDValue Add = N->getOperand(1);
15498 
15499  int Bits = 4 /* 16 byte alignment */;
15500 
15501  if (DAG.MaskedValueIsZero(Add->getOperand(1),
15502  APInt::getAllOnes(Bits /* alignment */)
15503  .zext(Add.getScalarValueSizeInBits()))) {
15504  SDNode *BasePtr = Add->getOperand(0).getNode();
15505  for (SDNode *U : BasePtr->uses()) {
15506  if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15507  cast<ConstantSDNode>(U->getOperand(0))->getZExtValue() == IID) {
15508  // We've found another LVSL/LVSR, and this address is an aligned
15509  // multiple of that one. The results will be the same, so use the
15510  // one we've just found instead.
15511 
15512  return SDValue(U, 0);
15513  }
15514  }
15515  }
15516 
15517  if (isa<ConstantSDNode>(Add->getOperand(1))) {
15518  SDNode *BasePtr = Add->getOperand(0).getNode();
15519  for (SDNode *U : BasePtr->uses()) {
15520  if (U->getOpcode() == ISD::ADD &&
15521  isa<ConstantSDNode>(U->getOperand(1)) &&
15522  (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15523  cast<ConstantSDNode>(U->getOperand(1))->getZExtValue()) %
15524  (1ULL << Bits) ==
15525  0) {
15526  SDNode *OtherAdd = U;
15527  for (SDNode *V : OtherAdd->uses()) {
15528  if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15529  cast<ConstantSDNode>(V->getOperand(0))->getZExtValue() ==
15530  IID) {
15531  return SDValue(V, 0);
15532  }
15533  }
15534  }
15535  }
15536  }
15537  }
15538 
15539  // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15540  // Expose the vabsduw/h/b opportunity for down stream
15541  if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15542  (IID == Intrinsic::ppc_altivec_vmaxsw ||
15543  IID == Intrinsic::ppc_altivec_vmaxsh ||
15544  IID == Intrinsic::ppc_altivec_vmaxsb)) {
15545  SDValue V1 = N->getOperand(1);
15546  SDValue V2 = N->getOperand(2);
15547  if ((V1.getSimpleValueType() == MVT::v4i32 ||
15548  V1.getSimpleValueType() == MVT::v8i16 ||
15549  V1.getSimpleValueType() == MVT::v16i8) &&
15550  V1.getSimpleValueType() == V2.getSimpleValueType()) {
15551  // (0-a, a)
15552  if (V1.getOpcode() == ISD::SUB &&
15554  V1.getOperand(1) == V2) {
15555  return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15556  }
15557  // (a, 0-a)
15558  if (V2.getOpcode() == ISD::SUB &&
15559  ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15560  V2.getOperand(1) == V1) {
15561  return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15562  }
15563  // (x-y, y-x)
15564  if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15565  V1.getOperand(0) == V2.getOperand(1) &&
15566  V1.getOperand(1) == V2.getOperand(0)) {
15567  return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15568  }
15569  }
15570  }
15571  }
15572 
15573  break;
15575  // For little endian, VSX loads require generating lxvd2x/xxswapd.
15576  // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15577  if (Subtarget.needsSwapsForVSXMemOps()) {
15578  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15579  default:
15580  break;
15581  case Intrinsic::ppc_vsx_lxvw4x:
15582  case Intrinsic::ppc_vsx_lxvd2x:
15583  return expandVSXLoadForLE(N, DCI);
15584  }
15585  }
15586  break;
15587  case ISD::INTRINSIC_VOID:
15588  // For little endian, VSX stores require generating xxswapd/stxvd2x.
15589  // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15590  if (Subtarget.needsSwapsForVSXMemOps()) {
15591  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15592  default:
15593  break;
15594  case Intrinsic::ppc_vsx_stxvw4x:
15595  case Intrinsic::ppc_vsx_stxvd2x:
15596  return expandVSXStoreForLE(N, DCI);
15597  }
15598  }
15599  break;
15600  case ISD::BSWAP: {
15601  // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
15602  // For subtargets without LDBRX, we can still do better than the default
15603  // expansion even for 64-bit BSWAP (LOAD).
15604  bool Is64BitBswapOn64BitTgt =
15605  Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
15606  bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
15607  N->getOperand(0).hasOneUse();
15608  if (IsSingleUseNormalLd &&
15609  (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
15610  (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
15611  SDValue Load = N->getOperand(0);
15612  LoadSDNode *LD = cast<LoadSDNode>(Load);
15613  // Create the byte-swapping load.
15614  SDValue Ops[] = {
15615  LD->getChain(), // Chain
15616  LD->getBasePtr(), // Ptr
15617  DAG.getValueType(N->getValueType(0)) // VT
15618  };
15619  SDValue BSLoad =
15621  DAG.getVTList(N->getValueType(0) == MVT::i64 ?
15623  Ops, LD->getMemoryVT(), LD->getMemOperand());
15624 
15625  // If this is an i16 load, insert the truncate.
15626  SDValue ResVal = BSLoad;
15627  if (N->getValueType(0) == MVT::i16)
15628  ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
15629 
15630  // First, combine the bswap away. This makes the value produced by the
15631  // load dead.
15632  DCI.CombineTo(N, ResVal);
15633 
15634  // Next, combine the load away, we give it a bogus result value but a real
15635  // chain result. The result value is dead because the bswap is dead.
15636  DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
15637 
15638  // Return N so it doesn't get rechecked!
15639  return SDValue(N, 0);
15640  }
15641  // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
15642  // before legalization so that the BUILD_PAIR is handled correctly.
15643  if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
15644  !IsSingleUseNormalLd)
15645  return SDValue();
15646  LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
15647 
15648  // Can't split volatile or atomic loads.
15649  if (!LD->isSimple())
15650  return SDValue();
15651  SDValue BasePtr = LD->getBasePtr();
15652  SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
15653  LD->getPointerInfo(), LD->getAlign());
15654  Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
15655  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15656  DAG.getIntPtrConstant(4, dl));
15658  LD->getMemOperand(), 4, 4);
15659  SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
15660  Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
15661  SDValue Res;
15662  if (Subtarget.isLittleEndian())
15663  Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
15664  else
15665  Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
15666  SDValue TF =
15668  Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
15669  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
15670  return Res;
15671  }
15672  case PPCISD::VCMP:
15673  // If a VCMP_rec node already exists with exactly the same operands as this
15674  // node, use its result instead of this node (VCMP_rec computes both a CR6
15675  // and a normal output).
15676  //
15677  if (!N->getOperand(0).hasOneUse() &&
15678  !N->getOperand(1).hasOneUse() &&
15679  !N->getOperand(2).hasOneUse()) {
15680 
15681  // Scan all of the users of the LHS, looking for VCMP_rec's that match.
15682  SDNode *VCMPrecNode = nullptr;
15683 
15684  SDNode *LHSN = N->getOperand(0).getNode();
15685  for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
15686  UI != E; ++UI)
15687  if (UI->getOpcode() == PPCISD::VCMP_rec &&
15688  UI->getOperand(1) == N->getOperand(1) &&
15689  UI->getOperand(2) == N->getOperand(2) &&
15690  UI->getOperand(0) == N->getOperand(0)) {
15691  VCMPrecNode = *UI;
15692  break;
15693  }
15694 
15695  // If there is no VCMP_rec node, or if the flag value has a single use,
15696  // don't transform this.
15697  if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
15698  break;
15699 
15700  // Look at the (necessarily single) use of the flag value. If it has a
15701  // chain, this transformation is more complex. Note that multiple things
15702  // could use the value result, which we should ignore.
15703  SDNode *FlagUser = nullptr;
15704  for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
15705  FlagUser == nullptr; ++UI) {
15706  assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
15707  SDNode *User = *UI;
15708  for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
15709  if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
15710  FlagUser = User;
15711  break;
15712  }
15713  }
15714  }
15715 
15716  // If the user is a MFOCRF instruction, we know this is safe.
15717  // Otherwise we give up for right now.
15718  if (FlagUser->getOpcode() == PPCISD::MFOCRF)
15719  return SDValue(VCMPrecNode, 0);
15720  }
15721  break;
15722  case ISD::BRCOND: {
15723  SDValue Cond = N->getOperand(1);
15724  SDValue Target = N->getOperand(2);
15725 
15726  if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15727  cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
15728  Intrinsic::loop_decrement) {
15729 
15730  // We now need to make the intrinsic dead (it cannot be instruction
15731  // selected).
15732  DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
15733  assert(Cond.getNode()->hasOneUse() &&
15734  "Counter decrement has more than one use");
15735 
15736  return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
15737  N->getOperand(0), Target);
15738  }
15739  }
15740  break;
15741  case ISD::BR_CC: {
15742  // If this is a branch on an altivec predicate comparison, lower this so
15743  // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
15744  // lowering is done pre-legalize, because the legalizer lowers the predicate
15745  // compare down to code that is difficult to reassemble.
15746  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
15747  SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
15748 
15749  // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
15750  // value. If so, pass-through the AND to get to the intrinsic.
15751  if (LHS.getOpcode() == ISD::AND &&
15752  LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15753  cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
15754  Intrinsic::loop_decrement &&
15755  isa<ConstantSDNode>(LHS.getOperand(1)) &&
15756  !isNullConstant(LHS.getOperand(1)))
15757  LHS = LHS.getOperand(0);
15758 
15759  if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15760  cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
15761  Intrinsic::loop_decrement &&
15762  isa<ConstantSDNode>(RHS)) {
15763  assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
15764  "Counter decrement comparison is not EQ or NE");
15765 
15766  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15767  bool isBDNZ = (CC == ISD::SETEQ && Val) ||
15768  (CC == ISD::SETNE && !Val);
15769 
15770  // We now need to make the intrinsic dead (it cannot be instruction
15771  // selected).
15772  DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
15773  assert(LHS.getNode()->hasOneUse() &&
15774  "Counter decrement has more than one use");
15775 
15776  return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
15777  N->getOperand(0), N->getOperand(4));
15778  }
15779 
15780  int CompareOpc;
15781  bool isDot;
15782 
15783  if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15784  isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
15785  getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
15786  assert(isDot && "Can't compare against a vector result!");
15787 
15788  // If this is a comparison against something other than 0/1, then we know
15789  // that the condition is never/always true.
15790  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15791  if (Val != 0 && Val != 1) {
15792  if (CC == ISD::SETEQ) // Cond never true, remove branch.
15793  return N->getOperand(0);
15794  // Always !=, turn it into an unconditional branch.
15795  return DAG.getNode(ISD::BR, dl, MVT::Other,
15796  N->getOperand(0), N->getOperand(4));
15797  }
15798 
15799  bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
15800 
15801  // Create the PPCISD altivec 'dot' comparison node.
15802  SDValue Ops[] = {
15803  LHS.getOperand(2), // LHS of compare
15804  LHS.getOperand(3), // RHS of compare
15805  DAG.getConstant(CompareOpc, dl, MVT::i32)
15806  };
15807  EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
15808  SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
15809 
15810  // Unpack the result based on how the target uses it.
15811  PPC::Predicate CompOpc;
15812  switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
15813  default: // Can't happen, don't crash on invalid number though.
15814  case 0: // Branch on the value of the EQ bit of CR6.
15815  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
15816  break;
15817  case 1: // Branch on the inverted value of the EQ bit of CR6.
15818  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
15819  break;
15820  case 2: // Branch on the value of the LT bit of CR6.
15821  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
15822  break;
15823  case 3: // Branch on the inverted value of the LT bit of CR6.
15824  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
15825  break;
15826  }
15827 
15828  return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
15829  DAG.getConstant(CompOpc, dl, MVT::i32),
15830  DAG.getRegister(PPC::CR6, MVT::i32),
15831  N->getOperand(4), CompNode.getValue(1));
15832  }
15833  break;
15834  }
15835  case ISD::BUILD_VECTOR:
15836  return DAGCombineBuildVector(N, DCI);
15837  case ISD::ABS:
15838  return combineABS(N, DCI);
15839  case ISD::VSELECT:
15840  return combineVSelect(N, DCI);
15841  }
15842 
15843  return SDValue();
15844 }
15845 
15846 SDValue
15848  SelectionDAG &DAG,
15849  SmallVectorImpl<SDNode *> &Created) const {
15850  // fold (sdiv X, pow2)
15851  EVT VT = N->getValueType(0);
15852  if (VT == MVT::i64 && !Subtarget.isPPC64())
15853  return SDValue();
15854  if ((VT != MVT::i32 && VT != MVT::i64) ||
15855  !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
15856  return SDValue();
15857 
15858  SDLoc DL(N);
15859  SDValue N0 = N->getOperand(0);
15860 
15861  bool IsNegPow2 = Divisor.isNegatedPowerOf2();
15862  unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
15863  SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
15864 
15865  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
15866  Created.push_back(Op.getNode());
15867 
15868  if (IsNegPow2) {
15869  Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
15870  Created.push_back(Op.getNode());
15871  }
15872 
15873  return Op;
15874 }
15875 
15876 //===----------------------------------------------------------------------===//
15877 // Inline Assembly Support
15878 //===----------------------------------------------------------------------===//
15879 
15881  KnownBits &Known,
15882  const APInt &DemandedElts,
15883  const SelectionDAG &DAG,
15884  unsigned Depth) const {
15885  Known.resetAll();
15886  switch (Op.getOpcode()) {
15887  default: break;
15888  case PPCISD::LBRX: {
15889  // lhbrx is known to have the top bits cleared out.
15890  if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
15891  Known.Zero = 0xFFFF0000;
15892  break;
15893  }
15894  case ISD::INTRINSIC_WO_CHAIN: {
15895  switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
15896  default: break;
15897  case Intrinsic::ppc_altivec_vcmpbfp_p:
15898  case Intrinsic::ppc_altivec_vcmpeqfp_p:
15899  case Intrinsic::ppc_altivec_vcmpequb_p:
15900  case Intrinsic::ppc_altivec_vcmpequh_p:
15901  case Intrinsic::ppc_altivec_vcmpequw_p:
15902  case Intrinsic::ppc_altivec_vcmpequd_p:
15903  case Intrinsic::ppc_altivec_vcmpequq_p:
15904  case Intrinsic::ppc_altivec_vcmpgefp_p:
15905  case Intrinsic::ppc_altivec_vcmpgtfp_p:
15906  case Intrinsic::ppc_altivec_vcmpgtsb_p:
15907  case Intrinsic::ppc_altivec_vcmpgtsh_p:
15908  case Intrinsic::ppc_altivec_vcmpgtsw_p:
15909  case Intrinsic::ppc_altivec_vcmpgtsd_p:
15910  case Intrinsic::ppc_altivec_vcmpgtsq_p:
15911  case Intrinsic::ppc_altivec_vcmpgtub_p:
15912  case Intrinsic::ppc_altivec_vcmpgtuh_p:
15913  case Intrinsic::ppc_altivec_vcmpgtuw_p:
15914  case Intrinsic::ppc_altivec_vcmpgtud_p:
15915  case Intrinsic::ppc_altivec_vcmpgtuq_p:
15916  Known.Zero = ~1U; // All bits but the low one are known to be zero.
15917  break;
15918  }
15919  break;
15920  }
15921  case ISD::INTRINSIC_W_CHAIN: {
15922  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
15923  default:
15924  break;
15925  case Intrinsic::ppc_load2r:
15926  // Top bits are cleared for load2r (which is the same as lhbrx).
15927  Known.Zero = 0xFFFF0000;
15928  break;
15929  }
15930  break;
15931  }
15932  }
15933 }
15934 
15936  switch (Subtarget.getCPUDirective()) {
15937  default: break;
15938  case PPC::DIR_970:
15939  case PPC::DIR_PWR4:
15940  case PPC::DIR_PWR5:
15941  case PPC::DIR_PWR5X:
15942  case PPC::DIR_PWR6:
15943  case PPC::DIR_PWR6X:
15944  case PPC::DIR_PWR7:
15945  case PPC::DIR_PWR8:
15946  case PPC::DIR_PWR9:
15947  case PPC::DIR_PWR10:
15948  case PPC::DIR_PWR_FUTURE: {
15949  if (!ML)
15950  break;
15951 
15953  // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
15954  // so that we can decrease cache misses and branch-prediction misses.
15955  // Actual alignment of the loop will depend on the hotness check and other
15956  // logic in alignBlocks.
15957  if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
15958  return Align(32);
15959  }
15960 
15961  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
15962 
15963  // For small loops (between 5 and 8 instructions), align to a 32-byte
15964  // boundary so that the entire loop fits in one instruction-cache line.
15965  uint64_t LoopSize = 0;
15966  for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
15967  for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15968  LoopSize += TII->getInstSizeInBytes(*J);
15969  if (LoopSize > 32)
15970  break;
15971  }
15972 
15973  if (LoopSize > 16 && LoopSize <= 32)
15974  return Align(32);
15975 
15976  break;
15977  }
15978  }
15979 
15981 }
15982 
15983 /// getConstraintType - Given a constraint, return the type of
15984 /// constraint it is for this target.
15987  if (Constraint.size() == 1) {
15988  switch (Constraint[0]) {
15989  default: break;
15990  case 'b':
15991  case 'r':
15992  case 'f':
15993  case 'd':
15994  case 'v':
15995  case 'y':
15996  return C_RegisterClass;
15997  case 'Z':
15998  // FIXME: While Z does indicate a memory constraint, it specifically
15999  // indicates an r+r address (used in conjunction with the 'y' modifier
16000  // in the replacement string). Currently, we're forcing the base
16001  // register to be r0 in the asm printer (which is interpreted as zero)
16002  // and forming the complete address in the second register. This is
16003  // suboptimal.
16004  return C_Memory;
16005  }
16006  } else if (Constraint == "wc") { // individual CR bits.
16007  return C_RegisterClass;
16008  } else if (Constraint == "wa" || Constraint == "wd" ||
16009  Constraint == "wf" || Constraint == "ws" ||
16010  Constraint == "wi" || Constraint == "ww") {
16011  return C_RegisterClass; // VSX registers.
16012  }
16013  return TargetLowering::getConstraintType(Constraint);
16014 }
16015 
16016 /// Examine constraint type and operand type and determine a weight value.
16017 /// This object must already have been set up with the operand type
16018 /// and the current alternative constraint selected.
16021  AsmOperandInfo &info, const char *constraint) const {
16022  ConstraintWeight weight = CW_Invalid;
16023  Value *CallOperandVal = info.CallOperandVal;
16024  // If we don't have a value, we can't do a match,
16025  // but allow it at the lowest weight.
16026  if (!CallOperandVal)
16027  return CW_Default;
16028  Type *type = CallOperandVal->getType();
16029 
16030  // Look at the constraint type.
16031  if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16032  return CW_Register; // an individual CR bit.
16033  else if ((StringRef(constraint) == "wa" ||
16034  StringRef(constraint) == "wd" ||
16035  StringRef(constraint) == "wf") &&
16036  type->isVectorTy())
16037  return CW_Register;
16038  else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16039  return CW_Register; // just hold 64-bit integers data.
16040  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16041  return CW_Register;
16042  else if (StringRef(constraint) == "ww" && type->isFloatTy())
16043  return CW_Register;
16044 
16045  switch (*constraint) {
16046  default:
16048  break;
16049  case 'b':
16050  if (type->isIntegerTy())
16051  weight = CW_Register;
16052  break;
16053  case 'f':
16054  if (type->isFloatTy())
16055  weight = CW_Register;
16056  break;
16057  case 'd':
16058  if (type->isDoubleTy())
16059  weight = CW_Register;
16060  break;
16061  case 'v':
16062  if (type->isVectorTy())
16063  weight = CW_Register;
16064  break;
16065  case 'y':
16066  weight = CW_Register;
16067  break;
16068  case 'Z':
16069  weight = CW_Memory;
16070  break;
16071  }
16072  return weight;
16073 }
16074 
16075 std::pair<unsigned, const TargetRegisterClass *>
16077  StringRef Constraint,
16078  MVT VT) const {
16079  if (Constraint.size() == 1) {
16080  // GCC RS6000 Constraint Letters
16081  switch (Constraint[0]) {
16082  case 'b': // R1-R31
16083  if (VT == MVT::i64 && Subtarget.isPPC64())
16084  return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16085  return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16086  case 'r': // R0-R31
16087  if (VT == MVT::i64 && Subtarget.isPPC64())
16088  return std::make_pair(0U, &PPC::G8RCRegClass);
16089  return std::make_pair(0U, &PPC::GPRCRegClass);
16090  // 'd' and 'f' constraints are both defined to be "the floating point
16091  // registers", where one is for 32-bit and the other for 64-bit. We don't
16092  // really care overly much here so just give them all the same reg classes.
16093  case 'd':
16094  case 'f':
16095  if (Subtarget.hasSPE()) {
16096  if (VT == MVT::f32 || VT == MVT::i32)
16097  return std::make_pair(0U, &PPC::GPRCRegClass);
16098  if (VT == MVT::f64 || VT == MVT::i64)
16099  return std::make_pair(0U, &PPC::SPERCRegClass);
16100  } else {
16101  if (VT == MVT::f32 || VT == MVT::i32)
16102  return std::make_pair(0U, &PPC::F4RCRegClass);
16103  if (VT == MVT::f64 || VT == MVT::i64)
16104  return std::make_pair(0U, &PPC::F8RCRegClass);
16105  }
16106  break;
16107  case 'v':
16108  if (Subtarget.hasAltivec() && VT.isVector())
16109  return std::make_pair(0U, &PPC::VRRCRegClass);
16110  else if (Subtarget.hasVSX())
16111  // Scalars in Altivec registers only make sense with VSX.
16112  return std::make_pair(0U, &PPC::VFRCRegClass);
16113  break;
16114  case 'y': // crrc
16115  return std::make_pair(0U, &PPC::CRRCRegClass);
16116  }
16117  } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16118  // An individual CR bit.
16119  return std::make_pair(0U, &PPC::CRBITRCRegClass);
16120  } else if ((Constraint == "wa" || Constraint == "wd" ||
16121  Constraint == "wf" || Constraint == "wi") &&
16122  Subtarget.hasVSX()) {
16123  // A VSX register for either a scalar (FP) or vector. There is no
16124  // support for single precision scalars on subtargets prior to Power8.
16125  if (VT.isVector())
16126  return std::make_pair(0U, &PPC::VSRCRegClass);
16127  if (VT == MVT::f32 && Subtarget.hasP8Vector())
16128  return std::make_pair(0U, &PPC::VSSRCRegClass);
16129  return std::make_pair(0U, &PPC::VSFRCRegClass);
16130  } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16131  if (VT == MVT::f32 && Subtarget.hasP8Vector())
16132  return std::make_pair(0U, &PPC::VSSRCRegClass);
16133  else
16134  return std::make_pair(0U, &PPC::VSFRCRegClass);
16135  } else if (Constraint == "lr") {
16136  if (VT == MVT::i64)
16137  return std::make_pair(0U, &PPC::LR8RCRegClass);
16138  else
16139  return std::make_pair(0U, &PPC::LRRCRegClass);
16140  }
16141 
16142  // Handle special cases of physical registers that are not properly handled
16143  // by the base class.
16144  if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16145  // If we name a VSX register, we can't defer to the base class because it
16146  // will not recognize the correct register (their names will be VSL{0-31}
16147  // and V{0-31} so they won't match). So we match them here.
16148  if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16149  int VSNum = atoi(Constraint.data() + 3);
16150  assert(VSNum >= 0 && VSNum <= 63 &&
16151  "Attempted to access a vsr out of range");
16152  if (VSNum < 32)
16153  return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16154  return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16155  }
16156 
16157  // For float registers, we can't defer to the base class as it will match
16158  // the SPILLTOVSRRC class.
16159  if (Constraint.size() > 3 && Constraint[1] == 'f') {
16160  int RegNum = atoi(Constraint.data() + 2);
16161  if (RegNum > 31 || RegNum < 0)
16162  report_fatal_error("Invalid floating point register number");
16163  if (VT == MVT::f32 || VT == MVT::i32)
16164  return Subtarget.hasSPE()
16165  ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16166  : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16167  if (VT == MVT::f64 || VT == MVT::i64)
16168  return Subtarget.hasSPE()
16169  ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16170  : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16171  }
16172  }
16173 
16174  std::pair<unsigned, const TargetRegisterClass *> R =
16176 
16177  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16178  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16179  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16180  // register.
16181  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16182  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16183  if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16184  PPC::GPRCRegClass.contains(R.first))
16185  return std::make_pair(TRI->getMatchingSuperReg(R.first,
16186  PPC::sub_32, &PPC::G8RCRegClass),
16187  &PPC::G8RCRegClass);
16188 
16189  // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16190  if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16191  R.first = PPC::CR0;
16192  R.second = &PPC::CRRCRegClass;
16193  }
16194  // FIXME: This warning should ideally be emitted in the front end.
16195  const auto &TM = getTargetMachine();
16196  if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16197  if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16198  (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16199  (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16200  errs() << "warning: vector registers 20 to 32 are reserved in the "
16201  "default AIX AltiVec ABI and cannot be used\n";
16202  }
16203 
16204  return R;
16205 }
16206 
16207 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16208 /// vector. If it is invalid, don't add anything to Ops.
16210  std::string &Constraint,
16211  std::vector<SDValue>&Ops,
16212  SelectionDAG &DAG) const {
16213  SDValue Result;
16214 
16215  // Only support length 1 constraints.
16216  if (Constraint.length() > 1) return;
16217 
16218  char Letter = Constraint[0];
16219  switch (Letter) {
16220  default: break;
16221  case 'I':
16222  case 'J':
16223  case 'K':
16224  case 'L':
16225  case 'M':
16226  case 'N':
16227  case 'O':
16228  case 'P': {
16229  ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
16230  if (!CST) return; // Must be an immediate to match.
16231  SDLoc dl(Op);
16232  int64_t Value = CST->getSExtValue();
16233  EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16234  // numbers are printed as such.
16235  switch (Letter) {
16236  default: llvm_unreachable("Unknown constraint letter!");
16237  case 'I': // "I" is a signed 16-bit constant.
16238  if (isInt<16>(Value))
16239  Result = DAG.getTargetConstant(Value, dl, TCVT);
16240  break;
16241  case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
16242  if (isShiftedUInt<16, 16>(Value))
16243  Result = DAG.getTargetConstant(Value, dl, TCVT);
16244  break;
16245  case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
16246  if (isShiftedInt<16, 16>(Value))
16247  Result = DAG.getTargetConstant(Value, dl, TCVT);
16248  break;
16249  case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
16250  if (isUInt<16>(Value))
16251  Result = DAG.getTargetConstant(Value, dl, TCVT);
16252  break;
16253  case 'M': // "M" is a constant that is greater than 31.
16254  if (Value > 31)
16255  Result = DAG.getTargetConstant(Value, dl, TCVT);
16256  break;
16257  case 'N': // "N" is a positive constant that is an exact power of two.
16258  if (Value > 0 && isPowerOf2_64(Value))
16259  Result = DAG.getTargetConstant(Value, dl, TCVT);
16260  break;
16261  case 'O': // "O" is the constant zero.
16262  if (Value == 0)
16263  Result = DAG.getTargetConstant(Value, dl, TCVT);
16264  break;
16265  case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
16266  if (isInt<16>(-Value))
16267  Result = DAG.getTargetConstant(Value, dl, TCVT);
16268  break;
16269  }
16270  break;
16271  }
16272  }
16273 
16274  if (Result.getNode()) {
16275  Ops.push_back(Result);
16276  return;
16277  }
16278 
16279  // Handle standard constraint letters.
16280  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16281 }
16282 
16283 // isLegalAddressingMode - Return true if the addressing mode represented
16284 // by AM is legal for this target, for a load/store of the specified type.
16286  const AddrMode &AM, Type *Ty,
16287  unsigned AS,
16288  Instruction *I) const {
16289  // Vector type r+i form is supported since power9 as DQ form. We don't check
16290  // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
16291  // imm form is preferred and the offset can be adjusted to use imm form later
16292  // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
16293  // max offset to check legal addressing mode, we should be a little aggressive
16294  // to contain other offsets for that LSRUse.
16295  if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
16296  return false;
16297 
16298  // PPC allows a sign-extended 16-bit immediate field.
16299  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
16300  return false;
16301 
16302  // No global is ever allowed as a base.
16303  if (AM.BaseGV)
16304  return false;
16305 
16306  // PPC only support r+r,
16307  switch (AM.Scale) {
16308  case 0: // "r+i" or just "i", depending on HasBaseReg.
16309  break;
16310  case 1:
16311  if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
16312  return false;
16313  // Otherwise we have r+r or r+i.
16314  break;
16315  case 2:
16316  if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
16317  return false;
16318  // Allow 2*r as r+r.
16319  break;
16320  default:
16321  // No other scales are supported.
16322  return false;
16323  }
16324 
16325  return true;
16326 }
16327 
16328 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
16329  SelectionDAG &DAG) const {
16330  MachineFunction &MF = DAG.getMachineFunction();
16331  MachineFrameInfo &MFI = MF.getFrameInfo();
16332  MFI.setReturnAddressIsTaken(true);
16333 
16335  return SDValue();
16336 
16337  SDLoc dl(Op);
16338  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16339 
16340  // Make sure the function does not optimize away the store of the RA to
16341  // the stack.
16342  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
16343  FuncInfo->setLRStoreRequired();
16344  bool isPPC64 = Subtarget.isPPC64();
16345  auto PtrVT = getPointerTy(MF.getDataLayout());
16346 
16347  if (Depth > 0) {
16348  // The link register (return address) is saved in the caller's frame
16349  // not the callee's stack frame. So we must get the caller's frame
16350  // address and load the return address at the LR offset from there.
16351  SDValue FrameAddr =
16352  DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16353  LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
16354  SDValue Offset =
16355  DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
16356  isPPC64 ? MVT::i64 : MVT::i32);
16357  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
16358  DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
16359  MachinePointerInfo());
16360  }
16361 
16362  // Just load the return address off the stack.
16363  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
16364  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
16365  MachinePointerInfo());
16366 }
16367 
16368 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
16369  SelectionDAG &DAG) const {
16370  SDLoc dl(Op);
16371  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16372 
16373  MachineFunction &MF = DAG.getMachineFunction();
16374  MachineFrameInfo &MFI = MF.getFrameInfo();
16375  MFI.setFrameAddressIsTaken(true);
16376 
16377  EVT PtrVT = getPointerTy(MF.getDataLayout());
16378  bool isPPC64 = PtrVT == MVT::i64;
16379 
16380  // Naked functions never have a frame pointer, and so we use r1. For all
16381  // other functions, this decision must be delayed until during PEI.
16382  unsigned FrameReg;
16383  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
16384  FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
16385  else
16386  FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
16387 
16388  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
16389  PtrVT);
16390  while (Depth--)
16391  FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16392  FrameAddr, MachinePointerInfo());
16393  return FrameAddr;
16394 }
16395 
16396 // FIXME? Maybe this could be a TableGen attribute on some registers and
16397 // this table could be generated automatically from RegInfo.
16399  const MachineFunction &MF) const {
16400  bool isPPC64 = Subtarget.isPPC64();
16401 
16402  bool is64Bit = isPPC64 && VT == LLT::scalar(64);
16403  if (!is64Bit && VT != LLT::scalar(32))
16404  report_fatal_error("Invalid register global variable type");
16405 
16407  .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
16408  .Case("r2", isPPC64 ? Register() : PPC::R2)
16409  .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
16410  .Default(Register());
16411 
16412  if (Reg)
16413  return Reg;
16414  report_fatal_error("Invalid register name global variable");
16415 }
16416 
16418  // 32-bit SVR4 ABI access everything as got-indirect.
16419  if (Subtarget.is32BitELFABI())
16420  return true;
16421 
16422  // AIX accesses everything indirectly through the TOC, which is similar to
16423  // the GOT.
16424  if (Subtarget.isAIXABI())
16425  return true;
16426 
16428  // If it is small or large code model, module locals are accessed
16429  // indirectly by loading their address from .toc/.got.
16430  if (CModel == CodeModel::Small || CModel == CodeModel::Large)
16431  return true;
16432 
16433  // JumpTable and BlockAddress are accessed as got-indirect.
16434  if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
16435  return true;
16436 
16437  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
16438  return Subtarget.isGVIndirectSymbol(G->getGlobal());
16439 
16440  return false;
16441 }
16442 
16443 bool
16445  // The PowerPC target isn't yet aware of offsets.
16446  return false;
16447 }
16448 
16450  const CallInst &I,
16451  MachineFunction &MF,
16452  unsigned Intrinsic) const {
16453  switch (Intrinsic) {
16454  case Intrinsic::ppc_atomicrmw_xchg_i128:
16455  case Intrinsic::ppc_atomicrmw_add_i128:
16456  case Intrinsic::ppc_atomicrmw_sub_i128:
16457  case Intrinsic::ppc_atomicrmw_nand_i128:
16458  case Intrinsic::ppc_atomicrmw_and_i128:
16459  case Intrinsic::ppc_atomicrmw_or_i128:
16460  case Intrinsic::ppc_atomicrmw_xor_i128:
16461  case Intrinsic::ppc_cmpxchg_i128:
16463  Info.memVT = MVT::i128;
16464  Info.ptrVal = I.getArgOperand(0);
16465  Info.offset = 0;
16466  Info.align = Align(16);
16469  return true;
16470  case Intrinsic::ppc_atomic_load_i128:
16472  Info.memVT = MVT::i128;
16473  Info.ptrVal = I.getArgOperand(0);
16474  Info.offset = 0;
16475  Info.align = Align(16);
16477  return true;
16478  case Intrinsic::ppc_atomic_store_i128:
16479  Info.opc = ISD::INTRINSIC_VOID;
16480  Info.memVT = MVT::i128;
16481  Info.ptrVal = I.getArgOperand(2);
16482  Info.offset = 0;
16483  Info.align = Align(16);
16485  return true;
16486  case Intrinsic::ppc_altivec_lvx:
16487  case Intrinsic::ppc_altivec_lvxl:
16488  case Intrinsic::ppc_altivec_lvebx:
16489  case Intrinsic::ppc_altivec_lvehx:
16490  case Intrinsic::ppc_altivec_lvewx:
16491  case Intrinsic::ppc_vsx_lxvd2x:
16492  case Intrinsic::ppc_vsx_lxvw4x:
16493  case Intrinsic::ppc_vsx_lxvd2x_be:
16494  case Intrinsic::ppc_vsx_lxvw4x_be:
16495  case Intrinsic::ppc_vsx_lxvl:
16496  case Intrinsic::ppc_vsx_lxvll: {
16497  EVT VT;
16498  switch (Intrinsic) {
16499  case Intrinsic::ppc_altivec_lvebx:
16500  VT = MVT::i8;
16501  break;
16502  case Intrinsic::ppc_altivec_lvehx:
16503  VT = MVT::i16;
16504  break;
16505  case Intrinsic::ppc_altivec_lvewx:
16506  VT = MVT::i32;
16507  break;
16508  case Intrinsic::ppc_vsx_lxvd2x:
16509  case Intrinsic::ppc_vsx_lxvd2x_be:
16510  VT = MVT::v2f64;
16511  break;
16512  default:
16513  VT = MVT::v4i32;
16514  break;
16515  }
16516 
16518  Info.memVT = VT;
16519  Info.ptrVal = I.getArgOperand(0);
16520  Info.offset = -VT.getStoreSize()+1;
16521  Info.size = 2*VT.getStoreSize()-1;
16522  Info.align = Align(1);
16524  return true;
16525  }
16526  case Intrinsic::ppc_altivec_stvx:
16527  case Intrinsic::ppc_altivec_stvxl:
16528  case Intrinsic::ppc_altivec_stvebx:
16529  case Intrinsic::ppc_altivec_stvehx:
16530  case Intrinsic::ppc_altivec_stvewx:
16531  case Intrinsic::ppc_vsx_stxvd2x:
16532  case Intrinsic::ppc_vsx_stxvw4x:
16533  case Intrinsic::ppc_vsx_stxvd2x_be:
16534  case Intrinsic::ppc_vsx_stxvw4x_be:
16535  case Intrinsic::ppc_vsx_stxvl:
16536  case Intrinsic::ppc_vsx_stxvll: {
16537  EVT VT;
16538  switch (Intrinsic) {
16539  case Intrinsic::ppc_altivec_stvebx:
16540  VT = MVT::i8;
16541  break;
16542  case Intrinsic::ppc_altivec_stvehx:
16543  VT = MVT::i16;
16544  break;
16545  case Intrinsic::ppc_altivec_stvewx:
16546  VT = MVT::i32;
16547  break;
16548  case Intrinsic::ppc_vsx_stxvd2x:
16549  case Intrinsic::ppc_vsx_stxvd2x_be:
16550  VT = MVT::v2f64;
16551  break;
16552  default:
16553  VT = MVT::v4i32;
16554  break;
16555  }
16556 
16557  Info.opc = ISD::INTRINSIC_VOID;
16558  Info.memVT = VT;
16559  Info.ptrVal = I.getArgOperand(1);
16560  Info.offset = -VT.getStoreSize()+1;
16561  Info.size = 2*VT.getStoreSize()-1;
16562  Info.align = Align(1);
16564  return true;
16565  }
16566  default:
16567  break;
16568  }
16569 
16570  return false;
16571 }
16572 
16573 /// It returns EVT::Other if the type should be determined using generic
16574 /// target-independent logic.
16576  const MemOp &Op, const AttributeList &FuncAttributes) const {
16577  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
16578  // We should use Altivec/VSX loads and stores when available. For unaligned
16579  // addresses, unaligned VSX loads are only fast starting with the P8.
16580  if (Subtarget.hasAltivec() && Op.size() >= 16 &&
16581  (Op.isAligned(Align(16)) ||
16582  ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
16583  return MVT::v4i32;
16584  }
16585 
16586  if (Subtarget.isPPC64()) {
16587  return MVT::i64;
16588  }
16589 
16590  return MVT::i32;
16591 }
16592 
16593 /// Returns true if it is beneficial to convert a load of a constant
16594 /// to just the constant itself.
16596  Type *Ty) const {
16597  assert(Ty->isIntegerTy());
16598 
16599  unsigned BitSize = Ty->getPrimitiveSizeInBits();
16600  return !(BitSize == 0 || BitSize > 64);
16601 }
16602 
16604  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
16605  return false;
16606  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
16607  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
16608  return NumBits1 == 64 && NumBits2 == 32;
16609 }
16610 
16612  if (!VT1.isInteger() || !VT2.isInteger())
16613  return false;
16614  unsigned NumBits1 = VT1.getSizeInBits();
16615  unsigned NumBits2 = VT2.getSizeInBits();
16616  return NumBits1 == 64 && NumBits2 == 32;
16617 }
16618 
16620  // Generally speaking, zexts are not free, but they are free when they can be
16621  // folded with other operations.
16622  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
16623  EVT MemVT = LD->getMemoryVT();
16624  if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
16625  (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
16626  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
16627  LD->getExtensionType() == ISD::ZEXTLOAD))
16628  return true;
16629  }
16630 
16631  // FIXME: Add other cases...
16632  // - 32-bit shifts with a zext to i64
16633  // - zext after ctlz, bswap, etc.
16634  // - zext after and by a constant mask
16635 
16636  return TargetLowering::isZExtFree(Val, VT2);
16637 }
16638 
16639 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
16640  assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
16641  "invalid fpext types");
16642  // Extending to float128 is not free.
16643  if (DestVT == MVT::f128)
16644  return false;
16645  return true;
16646 }
16647 
16649  return isInt<16>(Imm) || isUInt<16>(Imm);
16650 }
16651 
16653  return isInt<16>(Imm) || isUInt<16>(Imm);
16654 }
16655 
16658  bool *Fast) const {
16659  if (DisablePPCUnaligned)
16660  return false;
16661 
16662  // PowerPC supports unaligned memory access for simple non-vector types.
16663  // Although accessing unaligned addresses is not as efficient as accessing
16664  // aligned addresses, it is generally more efficient than manual expansion,
16665  // and generally only traps for software emulation when crossing page
16666  // boundaries.
16667 
16668  if (!VT.isSimple())
16669  return false;
16670 
16671  if (VT.isFloatingPoint() && !VT.isVector() &&
16672  !Subtarget.allowsUnalignedFPAccess())
16673  return false;
16674 
16675  if (VT.getSimpleVT().isVector()) {
16676  if (Subtarget.hasVSX()) {
16677  if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
16678  VT != MVT::v4f32 && VT != MVT::v4i32)
16679  return false;
16680  } else {
16681  return false;
16682  }
16683  }
16684 
16685  if (VT == MVT::ppcf128)
16686  return false;
16687 
16688  if (Fast)
16689  *Fast = true;
16690 
16691  return true;
16692 }
16693 
16695  SDValue C) const {
16696  // Check integral scalar types.
16697  if (!VT.isScalarInteger())
16698  return false;
16699  if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
16700  if (!ConstNode->getAPIntValue().isSignedIntN(64))
16701  return false;
16702  // This transformation will generate >= 2 operations. But the following
16703  // cases will generate <= 2 instructions during ISEL. So exclude them.
16704  // 1. If the constant multiplier fits 16 bits, it can be handled by one
16705  // HW instruction, ie. MULLI
16706  // 2. If the multiplier after shifted fits 16 bits, an extra shift
16707  // instruction is needed than case 1, ie. MULLI and RLDICR
16708  int64_t Imm = ConstNode->getSExtValue();
16709  unsigned Shift = countTrailingZeros<uint64_t>(Imm);
16710  Imm >>= Shift;
16711  if (isInt<16>(Imm))
16712  return false;
16713  uint64_t UImm = static_cast<uint64_t>(Imm);
16714  if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
16715  isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
16716  return true;
16717  }
16718  return false;
16719 }
16720 
16722  EVT VT) const {
16724  MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
16725 }
16726 
16728  Type *Ty) const {
16729  switch (Ty->getScalarType()->getTypeID()) {
16730  case Type::FloatTyID:
16731  case Type::DoubleTyID:
16732  return true;
16733  case Type::FP128TyID:
16734  return Subtarget.hasP9Vector();
16735  default:
16736  return false;
16737  }
16738 }
16739 
16740 // FIXME: add more patterns which are not profitable to hoist.
16742  if (!I->hasOneUse())
16743  return true;
16744 
16745  Instruction *User = I->user_back();
16746  assert(User && "A single use instruction with no uses.");
16747 
16748  switch (I->getOpcode()) {
16749  case Instruction::FMul: {
16750  // Don't break FMA, PowerPC prefers FMA.
16751  if (User->getOpcode() != Instruction::FSub &&
16752  User->getOpcode() != Instruction::FAdd)
16753  return true;
16754 
16756  const Function *F = I->getFunction();
16757  const DataLayout &DL = F->getParent()->getDataLayout();
16758  Type *Ty = User->getOperand(0)->getType();
16759 
16760  return !(
16761  isFMAFasterThanFMulAndFAdd(*F, Ty) &&
16763  (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16764  }
16765  case Instruction::Load: {
16766  // Don't break "store (load float*)" pattern, this pattern will be combined
16767  // to "store (load int32)" in later InstCombine pass. See function
16768  // combineLoadToOperationType. On PowerPC, loading a float point takes more
16769  // cycles than loading a 32 bit integer.
16770  LoadInst *LI = cast<LoadInst>(I);
16771  // For the loads that combineLoadToOperationType does nothing, like
16772  // ordered load, it should be profitable to hoist them.
16773  // For swifterror load, it can only be used for pointer to pointer type, so
16774  // later type check should get rid of this case.
16775  if (!LI->isUnordered())
16776  return true;
16777 
16778  if (User->getOpcode() != Instruction::Store)
16779  return true;
16780 
16781  if (I->getType()->getTypeID() != Type::FloatTyID)
16782  return true;
16783 
16784  return false;
16785  }
16786  default:
16787  return true;
16788  }
16789  return true;
16790 }
16791 
16792 const MCPhysReg *
16794  // LR is a callee-save register, but we must treat it as clobbered by any call
16795  // site. Hence we include LR in the scratch registers, which are in turn added
16796  // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
16797  // to CTR, which is used by any indirect call.
16798  static const MCPhysReg ScratchRegs[] = {
16799  PPC::X12, PPC::LR8, PPC::CTR8, 0
16800  };
16801 
16802  return ScratchRegs;
16803 }
16804 
16806  const Constant *PersonalityFn) const {
16807  return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
16808 }
16809 
16811  const Constant *PersonalityFn) const {
16812  return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
16813 }
16814 
16815 bool
16817  EVT VT , unsigned DefinedValues) const {
16818  if (VT == MVT::v2i64)
16819  return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
16820 
16821  if (Subtarget.hasVSX())
16822  return true;
16823 
16824  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
16825 }
16826 
16828  if (DisableILPPref || Subtarget.enableMachineScheduler())
16830 
16831  return Sched::ILP;
16832 }
16833 
16834 // Create a fast isel object.
16835 FastISel *
16837  const TargetLibraryInfo *LibInfo) const {
16838  return PPC::createFastISel(FuncInfo, LibInfo);
16839 }
16840 
16841 // 'Inverted' means the FMA opcode after negating one multiplicand.
16842 // For example, (fma -a b c) = (fnmsub a b c)
16843 static unsigned invertFMAOpcode(unsigned Opc) {
16844  switch (Opc) {
16845  default:
16846  llvm_unreachable("Invalid FMA opcode for PowerPC!");
16847  case ISD::FMA:
16848  return PPCISD::FNMSUB;
16849  case PPCISD::FNMSUB:
16850  return ISD::FMA;
16851  }
16852 }
16853 
16855  bool LegalOps, bool OptForSize,
16856  NegatibleCost &Cost,
16857  unsigned Depth) const {
16859  return SDValue();
16860 
16861  unsigned Opc = Op.getOpcode();
16862  EVT VT = Op.getValueType();
16863  SDNodeFlags Flags = Op.getNode()->getFlags();
16864 
16865  switch (Opc) {
16866  case PPCISD::FNMSUB:
16867  if (!Op.hasOneUse() || !isTypeLegal(VT))
16868  break;
16869 
16871  SDValue N0 = Op.getOperand(0);
16872  SDValue N1 = Op.getOperand(1);
16873  SDValue N2 = Op.getOperand(2);
16874  SDLoc Loc(Op);
16875 
16877  SDValue NegN2 =
16878  getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
16879 
16880  if (!NegN2)
16881  return SDValue();
16882 
16883  // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
16884  // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
16885  // These transformations may change sign of zeroes. For example,
16886  // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
16887  if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
16888  // Try and choose the cheaper one to negate.
16890  SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
16891  N0Cost, Depth + 1);
16892 
16894  SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
16895  N1Cost, Depth + 1);
16896 
16897  if (NegN0 && N0Cost <= N1Cost) {
16898  Cost = std::min(N0Cost, N2Cost);
16899  return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16900  } else if (NegN1) {
16901  Cost = std::min(N1Cost, N2Cost);
16902  return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16903  }
16904  }
16905 
16906  // (fneg (fnmsub a b c)) => (fma a b (fneg c))
16907  if (isOperationLegal(ISD::FMA, VT)) {
16908  Cost = N2Cost;
16909  return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
16910  }
16911 
16912  break;
16913  }
16914 
16915  return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
16916  Cost, Depth);
16917 }
16918 
16919 // Override to enable LOAD_STACK_GUARD lowering on Linux.
16921  if (!Subtarget.isTargetLinux())
16923  return true;
16924 }
16925 
16926 // Override to disable global variable loading on Linux and insert AIX canary
16927 // word declaration.
16929  if (Subtarget.isAIXABI()) {
16930  M.getOrInsertGlobal(AIXSSPCanaryWordName,
16931  Type::getInt8PtrTy(M.getContext()));
16932  return;
16933  }
16934  if (!Subtarget.isTargetLinux())
16936 }
16937 
16939  if (Subtarget.isAIXABI())
16940  return M.getGlobalVariable(AIXSSPCanaryWordName);
16942 }
16943 
16945  bool ForCodeSize) const {
16946  if (!VT.isSimple() || !Subtarget.hasVSX())
16947  return false;
16948 
16949  switch(VT.getSimpleVT().SimpleTy) {
16950  default:
16951  // For FP types that are currently not supported by PPC backend, return
16952  // false. Examples: f16, f80.
16953  return false;
16954  case MVT::f32:
16955  case MVT::f64:
16956  if (Subtarget.hasPrefixInstrs()) {
16957  // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
16958  return true;
16959  }
16961  case MVT::ppcf128:
16962  return Imm.isPosZero();
16963  }
16964 }
16965 
16966 // For vector shift operation op, fold
16967 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
16969  SelectionDAG &DAG) {
16970  SDValue N0 = N->getOperand(0);
16971  SDValue N1 = N->getOperand(1);
16972  EVT VT = N0.getValueType();
16973  unsigned OpSizeInBits = VT.getScalarSizeInBits();
16974  unsigned Opcode = N->getOpcode();
16975  unsigned TargetOpcode;
16976 
16977  switch (Opcode) {
16978  default:
16979  llvm_unreachable("Unexpected shift operation");
16980  case ISD::SHL:
16981  TargetOpcode = PPCISD::SHL;
16982  break;
16983  case ISD::SRL:
16984  TargetOpcode = PPCISD::SRL;
16985  break;
16986  case ISD::SRA:
16987  TargetOpcode = PPCISD::SRA;
16988  break;
16989  }
16990 
16991  if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
16992  N1->getOpcode() == ISD::AND)
16994  if (Mask->getZExtValue() == OpSizeInBits - 1)
16995  return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
16996 
16997  return SDValue();
16998 }
16999 
17000 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17001  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17002  return Value;
17003 
17004  SDValue N0 = N->getOperand(0);
17005  ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17006  if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17007  N0.getOpcode() != ISD::SIGN_EXTEND ||
17008  N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17009  N->getValueType(0) != MVT::i64)
17010  return SDValue();
17011 
17012  // We can't save an operation here if the value is already extended, and
17013  // the existing shift is easier to combine.
17014  SDValue ExtsSrc = N0.getOperand(0);
17015  if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17016  ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17017  return SDValue();
17018 
17019  SDLoc DL(N0);
17020  SDValue ShiftBy = SDValue(CN1, 0);
17021  // We want the shift amount to be i32 on the extswli, but the shift could
17022  // have an i64.
17023  if (ShiftBy.getValueType() == MVT::i64)
17024  ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17025 
17026  return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17027  ShiftBy);
17028 }
17029 
17030 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17031  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17032  return Value;
17033 
17034  return SDValue();
17035 }
17036 
17037 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17038  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17039  return Value;
17040 
17041  return SDValue();
17042 }
17043 
17044 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17045 // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17046 // When C is zero, the equation (addi Z, -C) can be simplified to Z
17047 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17049  const PPCSubtarget &Subtarget) {
17050  if (!Subtarget.isPPC64())
17051  return SDValue();
17052 
17053  SDValue LHS = N->getOperand(0);
17054  SDValue RHS = N->getOperand(1);
17055 
17056  auto isZextOfCompareWithConstant = [](SDValue Op) {
17057  if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17058  Op.getValueType() != MVT::i64)
17059  return false;
17060 
17061  SDValue Cmp = Op.getOperand(0);
17062  if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17063  Cmp.getOperand(0).getValueType() != MVT::i64)
17064  return false;
17065 
17066  if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17067  int64_t NegConstant = 0 - Constant->getSExtValue();
17068  // Due to the limitations of the addi instruction,
17069  // -C is required to be [-32768, 32767].
17070  return isInt<16>(NegConstant);
17071  }
17072 
17073  return false;
17074  };
17075 
17076  bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17077  bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17078 
17079  // If there is a pattern, canonicalize a zext operand to the RHS.
17080  if (LHSHasPattern && !RHSHasPattern)
17081  std::swap(LHS, RHS);
17082  else if (!LHSHasPattern && !RHSHasPattern)
17083  return SDValue();
17084 
17085  SDLoc DL(N);
17086  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17087  SDValue Cmp = RHS.getOperand(0);
17088  SDValue Z = Cmp.getOperand(0);
17089  auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17090  int64_t NegConstant = 0 - Constant->getSExtValue();
17091 
17092  switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17093  default: break;
17094  case ISD::SETNE: {
17095  // when C == 0
17096  // --> addze X, (addic Z, -1).carry
17097  // /
17098  // add X, (zext(setne Z, C))--
17099  // \ when -32768 <= -C <= 32767 && C != 0
17100  // --> addze X, (addic (addi Z, -C), -1).carry
17101  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17102  DAG.getConstant(NegConstant, DL, MVT::i64));
17103  SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17104  SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17105  AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17106  return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17107  SDValue(Addc.getNode(), 1));
17108  }
17109  case ISD::SETEQ: {
17110  // when C == 0
17111  // --> addze X, (subfic Z, 0).carry
17112  // /
17113  // add X, (zext(sete Z, C))--
17114  // \ when -32768 <= -C <= 32767 && C != 0
17115  // --> addze X, (subfic (addi Z, -C), 0).carry
17116  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17117  DAG.getConstant(NegConstant, DL, MVT::i64));
17118  SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17119  SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17120  DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17121  return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17122  SDValue(Subc.getNode(), 1));
17123  }
17124  }
17125 
17126  return SDValue();
17127 }
17128 
17129 // Transform
17130 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17131 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17132 // In this case both C1 and C2 must be known constants.
17133 // C1+C2 must fit into a 34 bit signed integer.
17135  const PPCSubtarget &Subtarget) {
17136  if (!Subtarget.isUsingPCRelativeCalls())
17137  return SDValue();
17138 
17139  // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17140  // If we find that node try to cast the Global Address and the Constant.
17141  SDValue LHS = N->getOperand(0);
17142  SDValue RHS = N->getOperand(1);
17143 
17144  if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17145  std::swap(LHS, RHS);
17146 
17147  if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17148  return SDValue();
17149 
17150  // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17151  GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17152  ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17153 
17154  // Check that both casts succeeded.
17155  if (!GSDN || !ConstNode)
17156  return SDValue();
17157 
17158  int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17159  SDLoc DL(GSDN);
17160 
17161  // The signed int offset needs to fit in 34 bits.
17162  if (!isInt<34>(NewOffset))
17163  return SDValue();
17164 
17165  // The new global address is a copy of the old global address except
17166  // that it has the updated Offset.
17167  SDValue GA =
17168  DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17169  NewOffset, GSDN->getTargetFlags());
17170  SDValue MatPCRel =
17171  DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17172  return MatPCRel;
17173 }
17174 
17175 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17176  if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17177  return Value;
17178 
17179  if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17180  return Value;
17181 
17182  return SDValue();
17183 }
17184 
17185 // Detect TRUNCATE operations on bitcasts of float128 values.
17186 // What we are looking for here is the situtation where we extract a subset
17187 // of bits from a 128 bit float.
17188 // This can be of two forms:
17189 // 1) BITCAST of f128 feeding TRUNCATE
17190 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17191 // The reason this is required is because we do not have a legal i128 type
17192 // and so we want to prevent having to store the f128 and then reload part
17193 // of it.
17194 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17195  DAGCombinerInfo &DCI) const {
17196  // If we are using CRBits then try that first.
17197  if (Subtarget.useCRBits()) {
17198  // Check if CRBits did anything and return that if it did.
17199  if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17200  return CRTruncValue;
17201  }
17202 
17203  SDLoc dl(N);
17204  SDValue Op0 = N->getOperand(0);
17205 
17206  // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
17207  if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
17208  EVT VT = N->getValueType(0);
17209  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17210  return SDValue();
17211  SDValue Sub = Op0.getOperand(0);
17212  if (Sub.getOpcode() == ISD::SUB) {
17213  SDValue SubOp0 = Sub.getOperand(0);
17214  SDValue SubOp1 = Sub.getOperand(1);
17215  if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
17216  (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
17217  return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
17218  SubOp1.getOperand(0),
17219  DCI.DAG.getTargetConstant(0, dl, MVT::i32));
17220  }
17221  }
17222  }
17223 
17224  // Looking for a truncate of i128 to i64.
17225  if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
17226  return SDValue();
17227 
17228  int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
17229 
17230  // SRL feeding TRUNCATE.
17231  if (Op0.getOpcode() == ISD::SRL) {
17232  ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
17233  // The right shift has to be by 64 bits.
17234  if (!ConstNode || ConstNode->getZExtValue() != 64)
17235  return SDValue();
17236 
17237  // Switch the element number to extract.
17238  EltToExtract = EltToExtract ? 0 : 1;
17239  // Update Op0 past the SRL.
17240  Op0 = Op0.getOperand(0);
17241  }
17242 
17243  // BITCAST feeding a TRUNCATE possibly via SRL.
17244  if (Op0.getOpcode() == ISD::BITCAST &&
17245  Op0.getValueType() == MVT::i128 &&
17246  Op0.getOperand(0).getValueType() == MVT::f128) {
17247  SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
17248  return DCI.DAG.getNode(
17250  DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
17251  }
17252  return SDValue();
17253 }
17254 
17255 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
17256  SelectionDAG &DAG = DCI.DAG;
17257 
17258  ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
17259  if (!ConstOpOrElement)
17260  return SDValue();
17261 
17262  // An imul is usually smaller than the alternative sequence for legal type.
17263  if (DAG.getMachineFunction().getFunction().hasMinSize() &&
17264  isOperationLegal(ISD::MUL, N->getValueType(0)))
17265  return SDValue();
17266 
17267  auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
17268  switch (this->Subtarget.getCPUDirective()) {
17269  default:
17270  // TODO: enhance the condition for subtarget before pwr8
17271  return false;
17272  case PPC::DIR_PWR8:
17273  // type mul add shl
17274  // scalar 4 1 1
17275  // vector 7 2 2
17276  return true;
17277  case PPC::DIR_PWR9:
17278  case PPC::DIR_PWR10:
17279  case PPC::DIR_PWR_FUTURE:
17280  // type mul add shl
17281  // scalar 5 2 2
17282  // vector 7 2 2
17283 
17284  // The cycle RATIO of related operations are showed as a table above.
17285  // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
17286  // scalar and vector type. For 2 instrs patterns, add/sub + shl
17287  // are 4, it is always profitable; but for 3 instrs patterns
17288  // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
17289  // So we should only do it for vector type.
17290  return IsAddOne && IsNeg ? VT.isVector() : true;
17291  }
17292  };
17293 
17294  EVT VT = N->getValueType(0);
17295  SDLoc DL(N);
17296 
17297  const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
17298  bool IsNeg = MulAmt.isNegative();
17299  APInt MulAmtAbs = MulAmt.abs();
17300 
17301  if ((MulAmtAbs - 1).isPowerOf2()) {
17302  // (mul x, 2^N + 1) => (add (shl x, N), x)
17303  // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
17304 
17305  if (!IsProfitable(IsNeg, true, VT))
17306  return SDValue();
17307 
17308  SDValue Op0 = N->getOperand(0);
17309  SDValue Op1 =
17310  DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17311  DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
17312  SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
17313 
17314  if (!IsNeg)
17315  return Res;
17316 
17317  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
17318  } else if ((MulAmtAbs + 1).isPowerOf2()) {
17319  // (mul x, 2^N - 1) => (sub (shl x, N), x)
17320  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
17321 
17322  if (!IsProfitable(IsNeg, false, VT))
17323  return SDValue();
17324 
17325  SDValue Op0 = N->getOperand(0);
17326  SDValue Op1 =
17327  DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17328  DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
17329 
17330  if (!IsNeg)
17331  return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
17332  else
17333  return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
17334 
17335  } else {
17336  return SDValue();
17337  }
17338 }
17339 
17340 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
17341 // in combiner since we need to check SD flags and other subtarget features.
17342 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
17343  DAGCombinerInfo &DCI) const {
17344  SDValue N0 = N->getOperand(0);
17345  SDValue N1 = N->getOperand(1);
17346  SDValue N2 = N->getOperand(2);
17347  SDNodeFlags Flags = N->getFlags();
17348  EVT VT = N->getValueType(0);
17349  SelectionDAG &DAG = DCI.DAG;
17351  unsigned Opc = N->getOpcode();
17352  bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
17353  bool LegalOps = !DCI.isBeforeLegalizeOps();
17354  SDLoc Loc(N);
17355 
17356  if (!isOperationLegal(ISD::FMA, VT))
17357  return SDValue();
17358 
17359  // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
17360  // since (fnmsub a b c)=-0 while c-ab=+0.
17361  if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
17362  return SDValue();
17363 
17364  // (fma (fneg a) b c) => (fnmsub a b c)
17365  // (fnmsub (fneg a) b c) => (fma a b c)
17366  if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
17367  return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
17368 
17369  // (fma a (fneg b) c) => (fnmsub a b c)
17370  // (fnmsub a (fneg b) c) => (fma a b c)
17371  if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
17372  return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
17373 
17374  return SDValue();
17375 }
17376 
17377 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
17378  // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
17379  if (!Subtarget.is64BitELFABI())
17380  return false;
17381 
17382  // If not a tail call then no need to proceed.
17383  if (!CI->isTailCall())
17384  return false;
17385 
17386  // If sibling calls have been disabled and tail-calls aren't guaranteed
17387  // there is no reason to duplicate.
17388  auto &TM = getTargetMachine();
17389  if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
17390  return false;
17391 
17392  // Can't tail call a function called indirectly, or if it has variadic args.
17393  const Function *Callee = CI->getCalledFunction();
17394  if (!Callee || Callee->isVarArg())
17395  return false;
17396 
17397  // Make sure the callee and caller calling conventions are eligible for tco.
17398  const Function *Caller = CI->getParent()->getParent();
17399  if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
17400  CI->getCallingConv()))
17401  return false;
17402 
17403  // If the function is local then we have a good chance at tail-calling it
17404  return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
17405 }
17406 
17407 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
17408  if (!Subtarget.hasVSX())
17409  return false;
17410  if (Subtarget.hasP9Vector() && VT == MVT::f128)
17411  return true;
17412  return VT == MVT::f32 || VT == MVT::f64 ||
17413  VT == MVT::v4f32 || VT == MVT::v2f64;
17414 }
17415 
17416 bool PPCTargetLowering::
17417 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
17418  const Value *Mask = AndI.getOperand(1);
17419  // If the mask is suitable for andi. or andis. we should sink the and.
17420  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
17421  // Can't handle constants wider than 64-bits.
17422  if (CI->getBitWidth() > 64)
17423  return false;
17424  int64_t ConstVal = CI->getZExtValue();
17425  return isUInt<16>(ConstVal) ||
17426  (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
17427  }
17428 
17429  // For non-constant masks, we can always use the record-form and.
17430  return true;
17431 }
17432 
17433 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
17434 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
17435 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
17436 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
17437 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
17438 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
17439  assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
17440  assert(Subtarget.hasP9Altivec() &&
17441  "Only combine this when P9 altivec supported!");
17442  EVT VT = N->getValueType(0);
17443  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17444  return SDValue();
17445 
17446  SelectionDAG &DAG = DCI.DAG;
17447  SDLoc dl(N);
17448  if (N->getOperand(0).getOpcode() == ISD::SUB) {
17449  // Even for signed integers, if it's known to be positive (as signed
17450  // integer) due to zero-extended inputs.
17451  unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
17452  unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
17453  if ((SubOpcd0 == ISD::ZERO_EXTEND ||
17454  SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
17455  (SubOpcd1 == ISD::ZERO_EXTEND ||
17456  SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
17457  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
17458  N->getOperand(0)->getOperand(0),
17459  N->getOperand(0)->getOperand(1),
17460  DAG.getTargetConstant(0, dl, MVT::i32));
17461  }
17462 
17463  // For type v4i32, it can be optimized with xvnegsp + vabsduw
17464  if (N->getOperand(0).getValueType() == MVT::v4i32 &&
17465  N->getOperand(0).hasOneUse()) {
17466  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
17467  N->getOperand(0)->getOperand(0),
17468  N->getOperand(0)->getOperand(1),
17469  DAG.getTargetConstant(1, dl, MVT::i32));
17470  }
17471  }
17472 
17473  return SDValue();
17474 }
17475 
17476 // For type v4i32/v8ii16/v16i8, transform
17477 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
17478 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
17479 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
17480 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
17481 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
17482  DAGCombinerInfo &DCI) const {
17483  assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
17484  assert(Subtarget.hasP9Altivec() &&
17485  "Only combine this when P9 altivec supported!");
17486 
17487  SelectionDAG &DAG = DCI.DAG;
17488  SDLoc dl(N);
17489  SDValue Cond = N->getOperand(0);
17490  SDValue TrueOpnd = N->getOperand(1);
17491  SDValue FalseOpnd = N->getOperand(2);
17492  EVT VT = N->getOperand(1).getValueType();
17493 
17494  if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
17495  FalseOpnd.getOpcode() != ISD::SUB)
17496  return SDValue();
17497 
17498  // ABSD only available for type v4i32/v8i16/v16i8
17499  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17500  return SDValue();
17501 
17502  // At least to save one more dependent computation
17503  if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
17504  return SDValue();
17505 
17506  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17507 
17508  // Can only handle unsigned comparison here
17509  switch (CC) {
17510  default:
17511  return SDValue();
17512  case ISD::SETUGT:
17513  case ISD::SETUGE:
17514  break;
17515  case ISD::SETULT:
17516  case ISD::SETULE:
17517  std::swap(TrueOpnd, FalseOpnd);
17518  break;
17519  }
17520 
17521  SDValue CmpOpnd1 = Cond.getOperand(0);
17522  SDValue CmpOpnd2 = Cond.getOperand(1);
17523 
17524  // SETCC CmpOpnd1 CmpOpnd2 cond
17525  // TrueOpnd = CmpOpnd1 - CmpOpnd2
17526  // FalseOpnd = CmpOpnd2 - CmpOpnd1
17527  if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
17528  TrueOpnd.getOperand(1) == CmpOpnd2 &&
17529  FalseOpnd.getOperand(0) == CmpOpnd2 &&
17530  FalseOpnd.getOperand(1) == CmpOpnd1) {
17531  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
17532  CmpOpnd1, CmpOpnd2,
17533  DAG.getTargetConstant(0, dl, MVT::i32));
17534  }
17535 
17536  return SDValue();
17537 }
17538 
17539 /// getAddrModeForFlags - Based on the set of address flags, select the most
17540 /// optimal instruction format to match by.
17541 PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
17542  // This is not a node we should be handling here.
17543  if (Flags == PPC::MOF_None)
17544  return PPC::AM_None;
17545  // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
17546  for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
17547  if ((Flags & FlagSet) == FlagSet)
17548  return PPC::AM_DForm;
17549  for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
17550  if ((Flags & FlagSet) == FlagSet)
17551  return PPC::AM_DSForm;
17552  for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
17553  if ((Flags & FlagSet) == FlagSet)
17554  return PPC::AM_DQForm;
17555  for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
17556  if ((Flags & FlagSet) == FlagSet)
17557  return PPC::AM_PrefixDForm;
17558  // If no other forms are selected, return an X-Form as it is the most
17559  // general addressing mode.
17560  return PPC::AM_XForm;
17561 }
17562 
17563 /// Set alignment flags based on whether or not the Frame Index is aligned.
17564 /// Utilized when computing flags for address computation when selecting
17565 /// load and store instructions.
17566 static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
17567  SelectionDAG &DAG) {
17568  bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
17569  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
17570  if (!FI)
17571  return;
17572  const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17573  unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
17574  // If this is (add $FI, $S16Imm), the alignment flags are already set
17575  // based on the immediate. We just need to clear the alignment flags
17576  // if the FI alignment is weaker.
17577  if ((FrameIndexAlign % 4) != 0)
17578  FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
17579  if ((FrameIndexAlign % 16) != 0)
17580  FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
17581  // If the address is a plain FrameIndex, set alignment flags based on
17582  // FI alignment.
17583  if (!IsAdd) {
17584  if ((FrameIndexAlign % 4) == 0)
17585  FlagSet |= PPC::MOF_RPlusSImm16Mult4;
17586  if ((FrameIndexAlign % 16) == 0)
17587  FlagSet |= PPC::MOF_RPlusSImm16Mult16;
17588  }
17589 }
17590 
17591 /// Given a node, compute flags that are used for address computation when
17592 /// selecting load and store instructions. The flags computed are stored in
17593 /// FlagSet. This function takes into account whether the node is a constant,
17594 /// an ADD, OR, or a constant, and computes the address flags accordingly.
17595 static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
17596  SelectionDAG &DAG) {
17597  // Set the alignment flags for the node depending on if the node is
17598  // 4-byte or 16-byte aligned.
17599  auto SetAlignFlagsForImm = [&](uint64_t Imm) {
17600  if ((Imm & 0x3) == 0)
17601  FlagSet |= PPC::MOF_RPlusSImm16Mult4;
17602  if ((Imm & 0xf) == 0)
17603  FlagSet |= PPC::MOF_RPlusSImm16Mult16;
17604  };
17605 
17606  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
17607  // All 32-bit constants can be computed as LIS + Disp.
17608  const APInt &ConstImm = CN->getAPIntValue();
17609  if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
17610  FlagSet |= PPC::MOF_AddrIsSImm32;
17611  SetAlignFlagsForImm(ConstImm.getZExtValue());
17612  setAlignFlagsForFI(N, FlagSet, DAG);
17613  }
17614  if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
17615  FlagSet |= PPC::MOF_RPlusSImm34;
17616  else // Let constant materialization handle large constants.
17617  FlagSet |= PPC::MOF_NotAddNorCst;
17618  } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
17619  // This address can be represented as an addition of:
17620  // - Register + Imm16 (possibly a multiple of 4/16)
17621  // - Register + Imm34
17622  // - Register + PPCISD::Lo
17623  // - Register + Register
17624  // In any case, we won't have to match this as Base + Zero.
17625  SDValue RHS = N.getOperand(1);
17626  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
17627  const APInt &ConstImm = CN->getAPIntValue();
17628  if (ConstImm.isSignedIntN(16)) {
17629  FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
17630  SetAlignFlagsForImm(ConstImm.getZExtValue());
17631  setAlignFlagsForFI(N, FlagSet, DAG);
17632  }
17633  if (ConstImm.isSignedIntN(34))
17634  FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
17635  else
17636  FlagSet |= PPC::MOF_RPlusR; // Register.
17637  } else if (RHS.getOpcode() == PPCISD::Lo &&
17638  !cast<ConstantSDNode>(RHS.getOperand(1))->getZExtValue())
17639  FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
17640  else
17641  FlagSet |= PPC::MOF_RPlusR;
17642  } else { // The address computation is not a constant or an addition.
17643  setAlignFlagsForFI(N, FlagSet, DAG);
17644  FlagSet |= PPC::MOF_NotAddNorCst;
17645  }
17646 }
17647 
17648 static bool isPCRelNode(SDValue N) {
17649  return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
17650  isValidPCRelNode<ConstantPoolSDNode>(N) ||
17651  isValidPCRelNode<GlobalAddressSDNode>(N) ||
17652  isValidPCRelNode<JumpTableSDNode>(N) ||
17653  isValidPCRelNode<BlockAddressSDNode>(N));
17654 }
17655 
17656 /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
17657 /// the address flags of the load/store instruction that is to be matched.
17658 unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
17659  SelectionDAG &DAG) const {
17660  unsigned FlagSet = PPC::MOF_None;
17661 
17662  // Compute subtarget flags.
17663  if (!Subtarget.hasP9Vector())
17664  FlagSet |= PPC::MOF_SubtargetBeforeP9;
17665  else {
17666  FlagSet |= PPC::MOF_SubtargetP9;
17667  if (Subtarget.hasPrefixInstrs())
17668  FlagSet |= PPC::MOF_SubtargetP10;
17669  }
17670  if (Subtarget.hasSPE())
17671  FlagSet |= PPC::MOF_SubtargetSPE;
17672 
17673  // Check if we have a PCRel node and return early.
17674  if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
17675  return FlagSet;
17676 
17677  // If the node is the paired load/store intrinsics, compute flags for
17678  // address computation and return early.
17679  unsigned ParentOp = Parent->getOpcode();
17680  if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
17681  (ParentOp == ISD::INTRINSIC_VOID))) {
17682  unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue();
17683  if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
17684  SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
17685  ? Parent->getOperand(2)
17686  : Parent->getOperand(3);
17687  computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
17688  FlagSet |= PPC::MOF_Vector;
17689  return FlagSet;
17690  }
17691  }
17692 
17693  // Mark this as something we don't want to handle here if it is atomic
17694  // or pre-increment instruction.
17695  if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
17696  if (LSB->isIndexed())
17697  return PPC::MOF_None;
17698 
17699  // Compute in-memory type flags. This is based on if there are scalars,
17700  // floats or vectors.
17701  const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
17702  assert(MN && "Parent should be a MemSDNode!");
17703  EVT MemVT = MN->getMemoryVT();
17704  unsigned Size = MemVT.getSizeInBits();
17705  if (MemVT.isScalarInteger()) {
17706  assert(Size <= 128 &&
17707  "Not expecting scalar integers larger than 16 bytes!");
17708  if (Size < 32)
17709  FlagSet |= PPC::MOF_SubWordInt;
17710  else if (Size == 32)
17711  FlagSet |= PPC::MOF_WordInt;
17712  else
17713  FlagSet |= PPC::MOF_DoubleWordInt;
17714  } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
17715  if (Size == 128)
17716  FlagSet |= PPC::MOF_Vector;
17717  else if (Size == 256) {
17718  assert(Subtarget.pairedVectorMemops() &&
17719  "256-bit vectors are only available when paired vector memops is "
17720  "enabled!");
17721  FlagSet |= PPC::MOF_Vector;
17722  } else
17723  llvm_unreachable("Not expecting illegal vectors!");
17724  } else { // Floating point type: can be scalar, f128 or vector types.
17725  if (Size == 32 || Size == 64)
17726  FlagSet |= PPC::MOF_ScalarFloat;
17727  else if (MemVT == MVT::f128 || MemVT.isVector())
17728  FlagSet |= PPC::MOF_Vector;
17729  else
17730  llvm_unreachable("Not expecting illegal scalar floats!");
17731  }
17732 
17733  // Compute flags for address computation.
17734  computeFlagsForAddressComputation(N, FlagSet, DAG);
17735 
17736  // Compute type extension flags.
17737  if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
17738  switch (LN->getExtensionType()) {
17739  case ISD::SEXTLOAD:
17740  FlagSet |= PPC::MOF_SExt;
17741  break;
17742  case ISD::EXTLOAD:
17743  case ISD::ZEXTLOAD:
17744  FlagSet |= PPC::MOF_ZExt;
17745  break;
17746  case ISD::NON_EXTLOAD:
17747  FlagSet |= PPC::MOF_NoExt;
17748  break;
17749  }
17750  } else
17751  FlagSet |= PPC::MOF_NoExt;
17752 
17753  // For integers, no extension is the same as zero extension.
17754  // We set the extension mode to zero extension so we don't have
17755  // to add separate entries in AddrModesMap for loads and stores.
17756  if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
17757  FlagSet |= PPC::MOF_ZExt;
17758  FlagSet &= ~PPC::MOF_NoExt;
17759  }
17760 
17761  // If we don't have prefixed instructions, 34-bit constants should be
17762  // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
17763  bool IsNonP1034BitConst =
17765  FlagSet) == PPC::MOF_RPlusSImm34;
17766  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
17767  IsNonP1034BitConst)
17768  FlagSet |= PPC::MOF_NotAddNorCst;
17769 
17770  return FlagSet;
17771 }
17772 
17773 /// SelectForceXFormMode - Given the specified address, force it to be
17774 /// represented as an indexed [r+r] operation (an XForm instruction).
17776  SDValue &Base,
17777  SelectionDAG &DAG) const {
17778 
17780  int16_t ForceXFormImm = 0;
17781  if (provablyDisjointOr(DAG, N) &&
17782  !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
17783  Disp = N.getOperand(0);
17784  Base = N.getOperand(1);
17785  return Mode;
17786  }
17787 
17788  // If the address is the result of an add, we will utilize the fact that the
17789  // address calculation includes an implicit add. However, we can reduce
17790  // register pressure if we do not materialize a constant just for use as the
17791  // index register. We only get rid of the add if it is not an add of a
17792  // value and a 16-bit signed constant and both have a single use.
17793  if (N.getOpcode() == ISD::ADD &&
17794  (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
17795  !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
17796  Disp = N.getOperand(0);
17797  Base = N.getOperand(1);
17798  return Mode;
17799  }
17800 
17801  // Otherwise, use R0 as the base register.
17802  Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
17803  N.getValueType());
17804  Base = N;
17805 
17806  return Mode;
17807 }
17808 
17810  SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
17811  unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
17812  EVT ValVT = Val.getValueType();
17813  // If we are splitting a scalar integer into f64 parts (i.e. so they
17814  // can be placed into VFRC registers), we need to zero extend and
17815  // bitcast the values. This will ensure the value is placed into a
17816  // VSR using direct moves or stack operations as needed.
17817  if (PartVT == MVT::f64 &&
17818  (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
17819  Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
17820  Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
17821  Parts[0] = Val;
17822  return true;
17823  }
17824  return false;
17825 }
17826 
17827 SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
17828  SelectionDAG &DAG) const {
17829  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17831  EVT RetVT = Op.getValueType();
17832  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
17833  SDValue Callee =
17834  DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
17835  bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
17838  for (const SDValue &N : Op->op_values()) {
17839  EVT ArgVT = N.getValueType();
17840  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
17841  Entry.Node = N;
17842  Entry.Ty = ArgTy;
17843  Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
17844  Entry.IsZExt = !Entry.IsSExt;
17845  Args.push_back(Entry);
17846  }
17847 
17848  SDValue InChain = DAG.getEntryNode();
17849  SDValue TCChain = InChain;
17850  const Function &F = DAG.getMachineFunction().getFunction();
17851  bool isTailCall =
17852  TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
17853  (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
17854  if (isTailCall)
17855  InChain = TCChain;
17856  CLI.setDebugLoc(SDLoc(Op))
17857  .setChain(InChain)
17858  .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
17859  .setTailCall(isTailCall)
17860  .setSExtResult(SignExtend)
17861  .setZExtResult(!SignExtend)
17863  return TLI.LowerCallTo(CLI).first;
17864 }
17865 
17866 SDValue PPCTargetLowering::lowerLibCallBasedOnType(
17867  const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
17868  SelectionDAG &DAG) const {
17869  if (Op.getValueType() == MVT::f32)
17870  return lowerToLibCall(LibCallFloatName, Op, DAG);
17871 
17872  if (Op.getValueType() == MVT::f64)
17873  return lowerToLibCall(LibCallDoubleName, Op, DAG);
17874 
17875  return SDValue();
17876 }
17877 
17878 bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
17879  SDNodeFlags Flags = Op.getNode()->getFlags();
17880  return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
17881  Flags.hasNoNaNs() && Flags.hasNoInfs();
17882 }
17883 
17884 bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
17885  return Op.getNode()->getFlags().hasApproximateFuncs();
17886 }
17887 
17888 bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
17890 }
17891 
17892 SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
17893  const char *LibCallFloatName,
17894  const char *LibCallDoubleNameFinite,
17895  const char *LibCallFloatNameFinite,
17896  SDValue Op,
17897  SelectionDAG &DAG) const {
17898  if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
17899  return SDValue();
17900 
17901  if (!isLowringToMASSFiniteSafe(Op))
17902  return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
17903  DAG);
17904 
17905  return lowerLibCallBasedOnType(LibCallFloatNameFinite,
17906  LibCallDoubleNameFinite, Op, DAG);
17907 }
17908 
17909 SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
17910  return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
17911  "__xl_powf_finite", Op, DAG);
17912 }
17913 
17914 SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
17915  return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
17916  "__xl_sinf_finite", Op, DAG);
17917 }
17918 
17919 SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
17920  return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
17921  "__xl_cosf_finite", Op, DAG);
17922 }
17923 
17924 SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
17925  return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
17926  "__xl_logf_finite", Op, DAG);
17927 }
17928 
17929 SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
17930  return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
17931  "__xl_log10f_finite", Op, DAG);
17932 }
17933 
17934 SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
17935  return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
17936  "__xl_expf_finite", Op, DAG);
17937 }
17938 
17939 // If we happen to match to an aligned D-Form, check if the Frame Index is
17940 // adequately aligned. If it is not, reset the mode to match to X-Form.
17941 static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
17942  PPC::AddrMode &Mode) {
17943  if (!isa<FrameIndexSDNode>(N))
17944  return;
17945  if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
17946  (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
17947  Mode = PPC::AM_XForm;
17948 }
17949 
17950 /// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
17951 /// compute the address flags of the node, get the optimal address mode based
17952 /// on the flags, and set the Base and Disp based on the address mode.
17954  SDValue N, SDValue &Disp,
17955  SDValue &Base,
17956  SelectionDAG &DAG,
17957  MaybeAlign Align) const {
17958  SDLoc DL(Parent);
17959 
17960  // Compute the address flags.
17961  unsigned Flags = computeMOFlags(Parent, N, DAG);
17962 
17963  // Get the optimal address mode based on the Flags.
17964  PPC::AddrMode Mode = getAddrModeForFlags(Flags);
17965 
17966  // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
17967  // Select an X-Form load if it is not.
17968  setXFormForUnalignedFI(N, Flags, Mode);
17969 
17970  // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
17971  if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
17972  assert(Subtarget.isUsingPCRelativeCalls() &&
17973  "Must be using PC-Relative calls when a valid PC-Relative node is "
17974  "present!");
17975  Mode = PPC::AM_PCRel;
17976  }
17977 
17978  // Set Base and Disp accordingly depending on the address mode.
17979  switch (Mode) {
17980  case PPC::AM_DForm:
17981  case PPC::AM_DSForm:
17982  case PPC::AM_DQForm: {
17983  // This is a register plus a 16-bit immediate. The base will be the
17984  // register and the displacement will be the immediate unless it
17985  // isn't sufficiently aligned.
17986  if (Flags & PPC::MOF_RPlusSImm16) {
17987  SDValue Op0 = N.getOperand(0);
17988  SDValue Op1 = N.getOperand(1);
17989  int16_t Imm = cast<ConstantSDNode>(Op1)->getAPIntValue().getZExtValue();
17990  if (!Align || isAligned(*Align, Imm)) {
17991  Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
17992  Base = Op0;
17993  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
17994  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
17995  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
17996  }
17997  break;
17998  }
17999  }
18000  // This is a register plus the @lo relocation. The base is the register
18001  // and the displacement is the global address.
18002  else if (Flags & PPC::MOF_RPlusLo) {
18003  Disp = N.getOperand(1).getOperand(0); // The global address.
18006  Disp.getOpcode() == ISD::TargetConstantPool ||
18007  Disp.getOpcode() == ISD::TargetJumpTable);
18008  Base = N.getOperand(0);
18009  break;
18010  }
18011  // This is a constant address at most 32 bits. The base will be
18012  // zero or load-immediate-shifted and the displacement will be
18013  // the low 16 bits of the address.
18014  else if (Flags & PPC::MOF_AddrIsSImm32) {
18015  auto *CN = cast<ConstantSDNode>(N);
18016  EVT CNType = CN->getValueType(0);
18017  uint64_t CNImm = CN->getZExtValue();
18018  // If this address fits entirely in a 16-bit sext immediate field, codegen
18019  // this as "d, 0".
18020  int16_t Imm;
18021  if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18022  Disp = DAG.getTargetConstant(Imm, DL, CNType);
18023  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18024  CNType);
18025  break;
18026  }
18027  // Handle 32-bit sext immediate with LIS + Addr mode.
18028  if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18029  (!Align || isAligned(*Align, CNImm))) {
18030  int32_t Addr = (int32_t)CNImm;
18031  // Otherwise, break this down into LIS + Disp.
18032  Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18033  Base =
18034  DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18035  uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18036  Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18037  break;
18038  }
18039  }
18040  // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18041  Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18042  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18043  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18044  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18045  } else
18046  Base = N;
18047  break;
18048  }
18049  case PPC::AM_PrefixDForm: {
18050  int64_t Imm34 = 0;
18051  unsigned Opcode = N.getOpcode();
18052  if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18053  (isIntS34Immediate(N.getOperand(1), Imm34))) {
18054  // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18055  Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18056  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18057  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18058  else
18059  Base = N.getOperand(0);
18060  } else if (isIntS34Immediate(N, Imm34)) {
18061  // The address is a 34-bit signed immediate.
18062  Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18063  Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18064  }
18065  break;
18066  }
18067  case PPC::AM_PCRel: {
18068  // When selecting PC-Relative instructions, "Base" is not utilized as
18069  // we select the address as [PC+imm].
18070  Disp = N;
18071  break;
18072  }
18073  case PPC::AM_None:
18074  break;
18075  default: { // By default, X-Form is always available to be selected.
18076  // When a frame index is not aligned, we also match by XForm.
18077  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18078  Base = FI ? N : N.getOperand(1);
18079  Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18080  N.getValueType())
18081  : N.getOperand(0);
18082  break;
18083  }
18084  }
18085  return Mode;
18086 }
18087 
18089  bool Return,
18090  bool IsVarArg) const {
18091  switch (CC) {
18092  case CallingConv::Cold:
18093  return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF_FIS);
18094  default:
18095  return CC_PPC64_ELF_FIS;
18096  }
18097 }
18098 
18100  // TODO: 16-byte atomic type support for AIX is in progress; we should be able
18101  // to inline 16-byte atomic ops on AIX too in the future.
18102  return Subtarget.isPPC64() &&
18103  (EnableQuadwordAtomics || !Subtarget.getTargetTriple().isOSAIX()) &&
18104  Subtarget.hasQuadwordAtomics();
18105 }
18106 
18109  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18110  if (shouldInlineQuadwordAtomics() && Size == 128)
18113 }
18114 
18117  unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18118  if (shouldInlineQuadwordAtomics() && Size == 128)
18121 }
18122 
18123 static Intrinsic::ID
18125  switch (BinOp) {
18126  default:
18127  llvm_unreachable("Unexpected AtomicRMW BinOp");
18128  case AtomicRMWInst::Xchg:
18129  return Intrinsic::ppc_atomicrmw_xchg_i128;
18130  case AtomicRMWInst::Add:
18131  return Intrinsic::ppc_atomicrmw_add_i128;
18132  case AtomicRMWInst::Sub:
18133  return Intrinsic::ppc_atomicrmw_sub_i128;
18134  case AtomicRMWInst::And:
18135  return Intrinsic::ppc_atomicrmw_and_i128;
18136  case AtomicRMWInst::Or:
18137  return Intrinsic::ppc_atomicrmw_or_i128;
18138  case AtomicRMWInst::Xor:
18139  return Intrinsic::ppc_atomicrmw_xor_i128;
18140  case AtomicRMWInst::Nand:
18141  return Intrinsic::ppc_atomicrmw_nand_i128;
18142  }
18143 }
18144 
18146  IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18147  Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18148  assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18149  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18150  Type *ValTy = Incr->getType();
18151  assert(ValTy->getPrimitiveSizeInBits() == 128);
18154  Type *Int64Ty = Type::getInt64Ty(M->getContext());
18155  Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18156  Value *IncrHi =
18157  Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18158  Value *Addr =
18159  Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
18160  Value *LoHi = Builder.CreateCall(RMW, {Addr, IncrLo, IncrHi});
18161  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18162  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18163  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18164  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18165  return Builder.CreateOr(
18166  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18167 }
18168 
18170  IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18171  Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18172  assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18173  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18174  Type *ValTy = CmpVal->getType();
18175  assert(ValTy->getPrimitiveSizeInBits() == 128);
18176  Function *IntCmpXchg =
18177  Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18178  Type *Int64Ty = Type::getInt64Ty(M->getContext());
18179  Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18180  Value *CmpHi =
18181  Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18182  Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18183  Value *NewHi =
18184  Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18185  Value *Addr =
18186  Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
18187  emitLeadingFence(Builder, CI, Ord);
18188  Value *LoHi =
18189  Builder.CreateCall(IntCmpXchg, {Addr, CmpLo, CmpHi, NewLo, NewHi});
18190  emitTrailingFence(Builder, CI, Ord);
18191  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18192  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18193  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18194  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18195  return Builder.CreateOr(
18196  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18197 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::PPCISD::READ_TIME_BASE
@ READ_TIME_BASE
Definition: PPCISelLowering.h:267
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
llvm::ISD::FPOWI
@ FPOWI
Definition: ISDOpcodes.h:917
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:151
llvm::SelectionDAG::getMemcpy
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
Definition: SelectionDAG.cpp:7166
llvm::PPCRegisterInfo
Definition: PPCRegisterInfo.h:57
i
i
Definition: README.txt:29
llvm::ISD::STRICT_FP_ROUND
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:464
llvm::PPCII::MO_TLSGD_FLAG
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:127
llvm::PPCISD::MTCTR
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
Definition: PPCISelLowering.h:192
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1437
llvm::lltok::APFloat
@ APFloat
Definition: LLToken.h:436
llvm::PPCII::MO_GOT_FLAG
@ MO_GOT_FLAG
MO_GOT_FLAG - If this bit is set the symbol reference is to be computed via the GOT.
Definition: PPC.h:118
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
llvm::TargetLoweringBase::MaxStoresPerMemsetOptSize
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3338
llvm::PPCSubtarget::useCRBits
bool useCRBits() const
useCRBits - Return true if we should store and manipulate i1 values in the individual condition regis...
Definition: PPCSubtarget.h:259
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:36
ValueTypes.h
CmpMode::FP
@ FP
llvm::APFloat::isDenormal
bool isDenormal() const
Definition: APFloat.h:1216
llvm::PPCSubtarget::hasRecipPrec
bool hasRecipPrec() const
Definition: PPCSubtarget.h:271
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::PPC::isXXINSERTWMask
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
Definition: PPCISelLowering.cpp:2215
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1569
Lowering
Shadow Stack GC Lowering
Definition: ShadowStackGCLowering.cpp:98
DisableAutoPairedVecSt
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
llvm::PPCTargetLowering::getPICJumpTableRelocBase
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
Definition: PPCISelLowering.cpp:3181
llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2374
llvm::ISD::STRICT_FSETCC
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:475
llvm::RISCVAttrs::StackAlign
StackAlign
Definition: RISCVAttributes.h:37
llvm::Type::FloatTyID
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
llvm::isConstOrConstSplat
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
Definition: SelectionDAG.cpp:10667
llvm::PPCSubtarget::hasFloat128
bool hasFloat128() const
Definition: PPCSubtarget.h:335
llvm::PPC::DIR_PWR9
@ DIR_PWR9
Definition: PPCSubtarget.h:62
llvm::PPC::DIR_PWR6X
@ DIR_PWR6X
Definition: PPCSubtarget.h:59
llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition: TargetLowering.h:2240
llvm::ISD::VECTOR_SHUFFLE
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:586
llvm::PPCSubtarget::hasPOPCNTD
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:359
llvm::SDValue::dump
void dump() const
Definition: SelectionDAGNodes.h:1178
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4710
llvm::TargetLoweringBase::AddPromotedToType
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
Definition: TargetLowering.h:2428
llvm::XCOFF::XTY_ER
@ XTY_ER
External reference.
Definition: XCOFF.h:226
llvm::SelectionDAG::getCALLSEQ_START
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:943
llvm::ISD::isSignedIntSetCC
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1457
llvm::ISD::ArgFlagsTy::isSplit
bool isSplit() const
Definition: TargetCallingConv.h:132
llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:146
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1448
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1101
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1433
llvm::SDUse
Represents a use of a SDNode.
Definition: SelectionDAGNodes.h:277
llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1373
llvm::PPCII::MO_PLT
@ MO_PLT
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition: PPC.h:105
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MachineInstr.h
MathExtras.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm::ISD::STRICT_FSQRT
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
llvm::PPCSubtarget::getRegisterInfo
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:224
llvm::PPCTargetLowering::getJumpTableEncoding
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
Definition: PPCISelLowering.cpp:3166
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::PPC::isXXSLDWIShuffleMask
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
Definition: PPCISelLowering.cpp:2290
llvm::TargetLowering::getSqrtResultForDenormInput
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
Definition: TargetLowering.h:4648
llvm::TargetOptions::GuaranteedTailCallOpt
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
Definition: TargetOptions.h:221
llvm::ISD::JumpTable
@ JumpTable
Definition: ISDOpcodes.h:81
llvm::PPC::PRED_LT
@ PRED_LT
Definition: PPCPredicates.h:27
llvm::PPCFunctionInfo::appendParameterType
void appendParameterType(ParamType Type)
Definition: PPCMachineFunctionInfo.cpp:75
llvm::PPCISD::FCTIDZ
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
Definition: PPCISelLowering.h:72
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::MVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: MachineValueType.h:1087
llvm::CC_PPC32_SVR4_VarArg
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ISD::PATCHPOINT
@ PATCHPOINT
Definition: ISDOpcodes.h:1299
llvm::MachineFrameInfo::hasVAStart
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
Definition: MachineFrameInfo.h:622
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition: SelectionDAGNodes.h:971
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
llvm::MCSectionXCOFF
Definition: MCSectionXCOFF.h:32
llvm::SectionKind::getMetadata
static SectionKind getMetadata()
Definition: SectionKind.h:188
llvm::PICLevel::SmallPIC
@ SmallPIC
Definition: CodeGen.h:33
llvm::PPCISD::FSQRT
@ FSQRT
Square root instruction.
Definition: PPCISelLowering.h:96
CalculateTailCallArgDest
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
Definition: PPCISelLowering.cpp:5022
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1091
llvm::PPCSubtarget::usesFunctionDescriptors
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:400
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:34
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:196
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
PPCRegisterInfo.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition: MachineOperand.h:800
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::ISD::FLT_ROUNDS_
@ FLT_ROUNDS_
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:863
llvm::PPCSubtarget::hasPrefixInstrs
bool hasPrefixInstrs() const
Definition: PPCSubtarget.h:288
llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: MachineValueType.h:358
llvm::TargetLowering::CallLoweringInfo::IsPatchPoint
bool IsPatchPoint
Definition: TargetLowering.h:4030
llvm::TargetMachine::useEmulatedTLS
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Definition: TargetMachine.cpp:146
llvm::PPCISD::BDNZ
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
Definition: PPCISelLowering.h:296
llvm::TargetFrameLowering
Information about stack frame layout on the target.
Definition: TargetFrameLowering.h:43
type
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
EmitTailCallStoreFPAndRetAddr
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
Definition: PPCISelLowering.cpp:4998
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:205
llvm::ISD::SETGT
@ SETGT
Definition: ISDOpcodes.h:1445
llvm::PPCISD::RFEBB
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
Definition: PPCISelLowering.h:428
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:886
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1421
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1449
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
llvm::PPC::isXXBRDShuffleMask
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
Definition: PPCISelLowering.cpp:2373
llvm::TargetLowering::getSingleConstraintMatchWeight
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Definition: TargetLowering.cpp:5518
llvm::MachineFrameInfo::setReturnAddressIsTaken
void setReturnAddressIsTaken(bool s)
Definition: MachineFrameInfo.h:377
llvm::TargetLowering::ConstraintType
ConstraintType
Definition: TargetLowering.h:4430
llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:991
llvm::MachineModuleInfo::getContext
const MCContext & getContext() const
Definition: MachineModuleInfo.h:143
llvm::KnownBits::resetAll
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:66
llvm::SelectionDAG::addNoMergeSiteInfo
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
Definition: SelectionDAG.h:2171
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::ConstantSDNode::getAPIntValue
const APInt & getAPIntValue() const
Definition: SelectionDAGNodes.h:1583
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1404
llvm::TargetOptions
Definition: TargetOptions.h:124
llvm::PPCFunctionInfo::setTailCallSPDelta
void setTailCallSPDelta(int size)
Definition: PPCMachineFunctionInfo.h:184
AtomicOrdering.h
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:189
llvm::PPCTargetLowering::getPreIndexedAddressParts
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
Definition: PPCISelLowering.cpp:2982
llvm::APInt::isSignedIntN
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:420
llvm::PPCFunctionInfo::getMinReservedArea
unsigned getMinReservedArea() const
Definition: PPCMachineFunctionInfo.h:180
llvm::APFloatBase::IEEEsingle
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:170
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:943
llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:750
llvm::ISD::AssertSext
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
llvm::PPCISD::FP_EXTEND_HALF
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
Definition: PPCISelLowering.h:458
llvm::PPC::AM_PrefixDForm
@ AM_PrefixDForm
Definition: PPCISelLowering.h:736
llvm::ISD::STRICT_FMAXNUM
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:423
llvm::EVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:348
llvm::PPCISD::VABSD
@ VABSD
An SDNode for Power9 vector absolute value difference.
Definition: PPCISelLowering.h:454
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::PPCSubtarget::hasMMA
bool hasMMA() const
Definition: PPCSubtarget.h:290
llvm::HexagonISD::JT
@ JT
Definition: HexagonISelLowering.h:52
llvm::MVT::ppcf128
@ ppcf128
Definition: MachineValueType.h:61
T
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:152
llvm::isIntS34Immediate
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
Definition: PPCISelLowering.cpp:2622
llvm::PPC::DIR_E500mc
@ DIR_E500mc
Definition: PPCSubtarget.h:52
llvm::PPC::DIR_A2
@ DIR_A2
Definition: PPCSubtarget.h:50
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:76
llvm::Function
Definition: Function.h:60
llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:542
llvm::PPCISD::CLRBHRB
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
Definition: PPCISelLowering.h:421
llvm::PPCII::MO_GOT_TLSLD_PCREL_FLAG
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:149
StringRef.h
llvm::ISD::ArgFlagsTy::isInConsecutiveRegsLast
bool isInConsecutiveRegsLast() const
Definition: TargetCallingConv.h:127
llvm::PPC::DIR_970
@ DIR_970
Definition: PPCSubtarget.h:49
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:700
llvm::StringSwitch::Default
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:183
llvm::ISD::UDIV
@ UDIV
Definition: ISDOpcodes.h:243
llvm::ISD::STRICT_UINT_TO_FP
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:449
llvm::PPC::PRED_GE
@ PRED_GE
Definition: PPCPredicates.h:30
llvm::InlineAsm::Kind_RegDefEarlyClobber
@ Kind_RegDefEarlyClobber
Definition: InlineAsm.h:240
computeFlagsForAddressComputation
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
Definition: PPCISelLowering.cpp:17595
llvm::PPCFunctionInfo::getVarArgsFrameIndex
int getVarArgsFrameIndex() const
Definition: PPCMachineFunctionInfo.h:224
is64Bit
static bool is64Bit(const char *name)
Definition: X86Disassembler.cpp:1018
llvm::ISD::STRICT_FMINNUM
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:424
llvm::MVT::i128
@ i128
Definition: MachineValueType.h:50
llvm::PPCFunctionInfo::VectorChar
@ VectorChar
Definition: PPCMachineFunctionInfo.h:30
llvm::PPC::AM_DForm
@ AM_DForm
Definition: PPCISelLowering.h:733
llvm::ISD::DYNAMIC_STACKALLOC
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:976
llvm::AtomicRMWInst::Xor
@ Xor
*p = old ^ v
Definition: Instructions.h:741
DisablePPCPreinc
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition: SelectionDAG.cpp:1798
llvm::CCState::addLoc
void addLoc(const CCValAssign &V)
Definition: CallingConvLower.h:251
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:425
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
DM
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
OP_COPY
@ OP_COPY
Definition: ARMISelLowering.cpp:8264
llvm::PPC::MOF_SubtargetP9
@ MOF_SubtargetP9
Definition: PPCISelLowering.h:725
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
contains
return AArch64::GPR64RegClass contains(Reg)
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:145
llvm::ISD::INIT_TRAMPOLINE
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1124
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:314
llvm::TargetLoweringBase::MaxStoresPerMemset
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
Definition: TargetLowering.h:3336
llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition: CodeGen.h:43
llvm::PPCTargetLowering::shouldExpandAtomicRMWInIR
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Definition: PPCISelLowering.cpp:18108
llvm::MipsISD::LDL
@ LDL
Definition: MipsISelLowering.h:252
llvm::ISD::FSHL
@ FSHL
Definition: ISDOpcodes.h:696
llvm::CodeModel::Medium
@ Medium
Definition: CodeGen.h:28
llvm::AtomicRMWInst::getOperation
BinOp getOperation() const
Definition: Instructions.h:801
CC_AIX
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
Definition: PPCISelLowering.cpp:6582
llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:727
llvm::PPCTargetLowering::SelectAddressRegReg
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=None) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
Definition: PPCISelLowering.cpp:2639
llvm::SelectionDAG::getFrameIndex
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
Definition: SelectionDAG.cpp:1679
uses
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the uses
Definition: README-SSE.txt:258
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::TargetLoweringBase::isOperationCustom
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
Definition: TargetLowering.h:1202
Statistic.h
getIntrinsicForAtomicRMWBinOp128
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
Definition: PPCISelLowering.cpp:18124
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1444
llvm::SDNodeFlags::hasNoNaNs
bool hasNoNaNs() const
Definition: SelectionDAGNodes.h:427
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:374
llvm::PPCTargetLowering::isAccessedAsGotIndirect
bool isAccessedAsGotIndirect(SDValue N) const
Definition: PPCISelLowering.cpp:16417
llvm::ISD::STACKRESTORE
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1057
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:9214
llvm::PPCISD::MTVSRA
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
Definition: PPCISelLowering.h:223
llvm::PPCISD::SEXT_LD_SPLAT
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
Definition: PPCISelLowering.h:576
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:454
DisablePerfectShuffle
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
llvm::LegacyLegalizeActions::Bitcast
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegacyLegalizerInfo.h:54
llvm::PPCSubtarget::hasP8Vector
bool hasP8Vector() const
Definition: PPCSubtarget.h:282
llvm::PPC::DIR_PWR4
@ DIR_PWR4
Definition: PPCSubtarget.h:55
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::PPC::AM_None
@ AM_None
Definition: PPCISelLowering.h:732
llvm::SelectionDAG::isSplatValue
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
Definition: SelectionDAG.cpp:2545
ErrorHandling.h
llvm::Sched::ILP
@ ILP
Definition: TargetLowering.h:102
llvm::PPCSubtarget::isLittleEndian
bool isLittleEndian() const
Definition: PPCSubtarget.h:262
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::PPC::MOF_NotAddNorCst
@ MOF_NotAddNorCst
Definition: PPCISelLowering.h:705
llvm::PPCFunctionInfo::addLiveInAttr
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
Definition: PPCMachineFunctionInfo.h:248
llvm::PPCISD::MAT_PCREL_ADDR
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
Definition: PPCISelLowering.h:463
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:144
llvm::PPCISD::ACC_BUILD
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
Definition: PPCISelLowering.h:476
llvm::PPCTargetLowering::expandVSXLoadForLE
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
Definition: PPCISelLowering.cpp:14557
llvm::XCOFF::XMC_PR
@ XMC_PR
Program Code.
Definition: XCOFF.h:90
llvm::TargetLowering::lowerCmpEqZeroToCtlzSrl
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:8795
llvm::PPCSubtarget::getFrameLowering
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:214
llvm::PPCCCState
Definition: PPCCCState.h:19
llvm::PPCSubtarget::hasVSX
bool hasVSX() const
Definition: PPCSubtarget.h:280
llvm::GlobalAlias
Definition: GlobalAlias.h:28
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1342
llvm::PPCSubtarget::getTargetTriple
const Triple & getTargetTriple() const
Definition: PPCSubtarget.h:361
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:920
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1365
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:766
llvm::TargetLowering::getPICJumpTableRelocBase
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
Definition: TargetLowering.cpp:453
llvm::PPCInstrInfo
Definition: PPCInstrInfo.h:191
llvm::PPCISD::VPERM
@ VPERM
VPERM - The PPC VPERM Instruction.
Definition: PPCISelLowering.h:100
llvm::ISD::USUBSAT
@ USUBSAT
Definition: ISDOpcodes.h:350
llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:455
LowerLabelRef
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:3082
llvm::PPCTargetLowering::getOptimalMemOpType
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
Definition: PPCISelLowering.cpp:16575
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
MachineBasicBlock.h
llvm::GlobalAddressSDNode::getTargetFlags
unsigned getTargetFlags() const
Definition: SelectionDAGNodes.h:1753
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:819
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3815
llvm::PPC::MOF_RPlusSImm16Mult16
@ MOF_RPlusSImm16Mult16
Definition: PPCISelLowering.h:709
llvm::PPCISD::STXSIX
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
Definition: PPCISelLowering.h:544
llvm::TargetLoweringBase::getPrefLoopAlignment
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
Definition: TargetLoweringBase.cpp:2037
llvm::SelectionDAG::ReplaceAllUsesWith
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
Definition: SelectionDAG.cpp:10086
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::PPCTargetLowering::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
Definition: PPCISelLowering.cpp:16449
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2324
llvm::MVT::Glue
@ Glue
Definition: MachineValueType.h:270
llvm::MemOp
Definition: TargetLowering.h:111
llvm::PPCTargetLowering::getSingleConstraintMatchWeight
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
Definition: PPCISelLowering.cpp:16020
llvm::PPC::isSplatShuffleMask
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
Definition: PPCISelLowering.cpp:2143
llvm::SDNode::use_iterator
This class provides iterator support for SDUse operands that use a specific SDNode.
Definition: SelectionDAGNodes.h:734
R4
#define R4(n)
llvm::TargetLoweringBase::shouldExpandAtomicRMWInIR
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Definition: TargetLowering.h:2088
APInt.h
llvm::PPCISD::BUILD_SPE64
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
Definition: PPCISelLowering.h:235
areCallingConvEligibleForTCO_64SVR4
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
Definition: PPCISelLowering.cpp:4811
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
llvm::PPCSubtarget::hasP9Vector
bool hasP9Vector() const
Definition: PPCSubtarget.h:285
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::TargetLowering::isPositionIndependent
bool isPositionIndependent() const
Definition: TargetLowering.cpp:45
llvm::CallBase::isStrictFP
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1845
llvm::TargetLowering::C_Memory
@ C_Memory
Definition: TargetLowering.h:4433
llvm::CallingConv::Fast
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:320
llvm::Function::arg_size
size_t arg_size() const
Definition: Function.h:774
llvm::ISD::ArgFlagsTy::isZExt
bool isZExt() const
Definition: TargetCallingConv.h:73
Shift
bool Shift
Definition: README.txt:468
DisablePPCUnaligned
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1439
llvm::PPCFrameLowering::getTOCSaveOffset
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
Definition: PPCFrameLowering.cpp:2678
llvm::PPC::isXXPERMDIShuffleMask
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
Definition: PPCISelLowering.cpp:2389
MachineJumpTableInfo.h
llvm::TargetMachine::getRelocationModel
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
Definition: TargetMachine.cpp:68
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition: RuntimeLibcalls.h:30
llvm::PPCISD::FADDRTZ
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
Definition: PPCISelLowering.h:302
DenseMap.h
llvm::KnownBits::getConstant
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
llvm::BranchProbability::getZero
static BranchProbability getZero()
Definition: BranchProbability.h:49
Module.h
llvm::PPCISD::Lo
@ Lo
Definition: PPCISelLowering.h:136
llvm::PPCII::MO_LO
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:160
llvm::PPCTargetLowering::ccAssignFnForCall
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
Definition: PPCISelLowering.cpp:18088
llvm::PPCTargetLowering::getByValTypeAlignment
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
Definition: PPCISelLowering.cpp:1585
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:749
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::AttributeList
Definition: Attributes.h:425
llvm::tgtok::Bits
@ Bits
Definition: TGLexer.h:50
TargetInstrInfo.h
llvm::FloatToBits
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:690
llvm::MemSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:1367
llvm::PPCSubtarget::enableMachineScheduler
bool enableMachineScheduler() const override
Scheduling customization.
Definition: PPCSubtarget.cpp:203
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition: SelectionDAG.cpp:7874
llvm::PPCISD::XXSPLT
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
Definition: PPCISelLowering.h:104
llvm::PPCISD::LD_GOT_TPREL_L
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
Definition: PPCISelLowering.h:335
llvm::PPCISD::FCFIDU
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
Definition: PPCISelLowering.h:65
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:142
isConstantOrUndef
static bool isConstantOrUndef(int Op, int Val)
isConstantOrUndef - Op is either an undef node or a ConstantSDNode.
Definition: PPCISelLowering.cpp:1810
llvm::MachineRegisterInfo::getLiveInVirtReg
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
Definition: MachineRegisterInfo.cpp:450
llvm::SelectionDAG::isBaseWithConstantOffset
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
Definition: SelectionDAG.cpp:4581
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:872
llvm::PPCISD::STRICT_FCTIDZ
@ STRICT_FCTIDZ
Definition: PPCISelLowering.h:491
llvm::PPCISD::FNMSUB
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
Definition: PPCISelLowering.h:170
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:736
mapArgRegToOffsetAIX
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
Definition: PPCISelLowering.cpp:6855
llvm::PPCTargetLowering::computeKnownBitsForTargetNode
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Definition: PPCISelLowering.cpp:15880
llvm::PPCISD::XXSPLTI32DX
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
Definition: PPCISelLowering.h:113
llvm::ISD::CALLSEQ_START
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1071
llvm::PPCII::MO_GOT_TLSGD_PCREL_FLAG
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:144
llvm::CCState::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: CallingConvLower.h:256
llvm::PPC::MOF_RPlusSImm34
@ MOF_RPlusSImm34
Definition: PPCISelLowering.h:710
llvm::PPCFunctionInfo::setVarArgsNumGPR
void setVarArgsNumGPR(unsigned Num)
Definition: PPCMachineFunctionInfo.h:231
llvm::SelectionDAG::getSplatBuildVector
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:822
llvm::EVT::getVectorVT
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
llvm::PPC::AM_DQForm
@ AM_DQForm
Definition: PPCISelLowering.h:735
llvm::CallBase::isMustTailCall
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition: Instructions.cpp:298
llvm::PPCTargetLowering::emitEHSjLjSetJmp
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
Definition: PPCISelLowering.cpp:11760
llvm::TargetLowering::CallLoweringInfo::CB
const CallBase * CB
Definition: TargetLowering.h:4047
llvm::Optional< CallingConv::ID >
llvm::TargetLoweringBase::setMinFunctionAlignment
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
Definition: TargetLowering.h:2450
llvm::TargetLowering::LowerCallTo
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
Definition: SelectionDAGBuilder.cpp:9628
FPR
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
Definition: PPCISelLowering.cpp:3871
llvm::PPCFunctionInfo
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
Definition: PPCMachineFunctionInfo.h:24
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1260
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::PPC::AM_DSForm
@ AM_DSForm
Definition: PPCISelLowering.h:734
llvm::PPCISD::DYNALLOC
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
Definition: PPCISelLowering.h:144
llvm::PPCSubtarget::getTargetMachine
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:227
llvm::PPCSubtarget::needsTwoConstNR
bool needsTwoConstNR() const
Definition: PPCSubtarget.h:281
llvm::TargetLoweringBase::isJumpTableRelative
virtual bool isJumpTableRelative() const
Definition: TargetLoweringBase.cpp:2033
llvm::ISD::FMAXNUM_IEEE
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:951
fixupShuffleMaskForPermutedSToV
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:14797
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:848
llvm::TargetLoweringBase::getVectorIdxTy
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
Definition: TargetLowering.h:408
llvm::PPCTargetLowering::EmitPartwordAtomicBinary
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
Definition: PPCISelLowering.cpp:11545
llvm::SelectionDAG::getCommutedVectorShuffle
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
Definition: SelectionDAG.cpp:2051
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
llvm::codeview::EncodedFramePtrReg::StackPtr
@ StackPtr
getVectorCompareInfo
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
Definition: PPCISelLowering.cpp:10191
STLExtras.h
llvm::PPCFunctionInfo::VectorInt
@ VectorInt
Definition: PPCMachineFunctionInfo.h:32
llvm::ISD::VAEND
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1086
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1316
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1404
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::PPCISD::MFBHRBE
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
Definition: PPCISelLowering.h:425
llvm::SelectionDAG::getZExtOrTrunc
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
Definition: SelectionDAG.cpp:1373
llvm::ArrayType
Class to represent array types.
Definition: DerivedTypes.h:357
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::PPC::PRED_GT
@ PRED_GT
Definition: PPCPredicates.h:31
llvm::PPCSubtarget::hasFSQRT
bool hasFSQRT() const
Definition: PPCSubtarget.h:266
llvm::TargetLoweringBase::shouldSignExtendTypeInLibCall
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
Definition: TargetLowering.h:2040
llvm::minidump::MemoryType
MemoryType
Definition: Minidump.h:98
stripModuloOnShift
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:16968
llvm::PPCTargetLowering::splitValueIntoRegisterParts
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, Optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Definition: PPCISelLowering.cpp:17809
llvm::BlockAddressSDNode
Definition: SelectionDAGNodes.h:2175
llvm::PPCSubtarget::getStackPointerRegister
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:430
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:178
Format.h
llvm::PPCSubtarget::hasFRSQRTES
bool hasFRSQRTES() const
Definition: PPCSubtarget.h:270
getCallOpcode
static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, const Function &Caller, const SDValue &Callee, const PPCSubtarget &Subtarget, const TargetMachine &TM, bool IsStrictFPCall=false)
Definition: PPCISelLowering.cpp:5222
llvm::PPCTargetLowering::emitLeadingFence
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
Definition: PPCISelLowering.cpp:11342
llvm::TargetLoweringBase::emitPatchPoint
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
Definition: TargetLoweringBase.cpp:1157
llvm::PPCTargetLowering::CallFlags::IsPatchPoint
const bool IsPatchPoint
Definition: PPCISelLowering.h:1165
llvm::PPCISD::ZEXT_LD_SPLAT
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
Definition: PPCISelLowering.h:572
llvm::PPC::MOF_SubtargetP10
@ MOF_SubtargetP10
Definition: PPCISelLowering.h:726
SelectionDAG.h
llvm::PPCTargetLowering::shouldExpandBuildVectorWithShuffles
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
Definition: PPCISelLowering.cpp:16816
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::SelectionDAG::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:454
llvm::PPCISD::LBRX
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
Definition: PPCISelLowering.h:520
DisableILPPref
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
llvm::PPCSubtarget::is64BitELFABI
bool is64BitELFABI() const
Definition: PPCSubtarget.h:371
llvm::InlineAsm::Op_FirstOperand
@ Op_FirstOperand
Definition: InlineAsm.h:220
Use.h
llvm::PPCISD::FCFID
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
Definition: PPCISelLowering.h:61
llvm::ISD::STRICT_FP_TO_UINT
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:442
llvm::InlineAsm::Kind_Clobber
@ Kind_Clobber
Definition: InlineAsm.h:241
llvm::PPCTargetLowering::expandVSXStoreForLE
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
Definition: PPCISelLowering.cpp:14623
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1435
llvm::PPC::isVPKUHUMShuffleMask
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Definition: PPCISelLowering.cpp:1820
CalculateStackSlotSize
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
Definition: PPCISelLowering.cpp:3877
llvm::ISD::SMAX
@ SMAX
Definition: ISDOpcodes.h:661
llvm::TargetLoweringBase::setIndexedStoreAction
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
Definition: TargetLowering.h:2370
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:462
llvm::convertToNonDenormSingle
bool convertToNonDenormSingle(APInt &ArgAPInt)
Definition: PPCISelLowering.cpp:9107
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:912
llvm::CC_PPC64_ELF_FIS
bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::PPCISD::BDZ
@ BDZ
Definition: PPCISelLowering.h:297
llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:213
llvm::RegState::ImplicitDefine
@ ImplicitDefine
Definition: MachineInstrBuilder.h:63
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::PPCTargetLowering::PPCTargetLowering
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
Definition: PPCISelLowering.cpp:153
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1001
llvm::ISD::EH_SJLJ_SETJMP
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:141
MachineRegisterInfo.h
llvm::PPCISD::ADDI_TLSLD_L
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
Definition: PPCISelLowering.h:384
KnownBits.h
llvm::TargetLoweringBase::getShiftAmountTy
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
Definition: TargetLoweringBase.cpp:908
llvm::ShuffleVectorSDNode
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
Definition: SelectionDAGNodes.h:1511
llvm::LoopBase::block_end
block_iterator block_end() const
Definition: LoopInfo.h:193
llvm::PPC::MOF_SubtargetSPE
@ MOF_SubtargetSPE
Definition: PPCISelLowering.h:727
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:2061
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::TargetLoweringBase::setIndexedLoadAction
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Definition: TargetLowering.h:2353
llvm::XCOFF::CsectProperties
Definition: XCOFF.h:456
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
llvm::ISD::INLINEASM
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1025
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
fixupFuncForFI
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
Definition: PPCISelLowering.cpp:2695
llvm::ISD::STRICT_FROUND
@ STRICT_FROUND
Definition: ISDOpcodes.h:427
llvm::GlobalValue::getSection
StringRef getSection() const
Definition: Globals.cpp:171
MachineValueType.h
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
llvm::TargetLowering::getNegatedExpression
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
Definition: TargetLowering.cpp:6660
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::ISD::ROTL
@ ROTL
Definition: ISDOpcodes.h:694
llvm::TargetLoweringObjectFile
Definition: TargetLoweringObjectFile.h:45
llvm::PPC::isXXBRWShuffleMask
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
Definition: PPCISelLowering.cpp:2369
PerfectShuffleTable
static const unsigned PerfectShuffleTable[6561+1]
Definition: AArch64PerfectShuffle.h:25
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::PPCISD::UINT_VEC_TO_FP
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
Definition: PPCISelLowering.h:248
llvm::SDNode::op_end
op_iterator op_end() const
Definition: SelectionDAGNodes.h:917
llvm::PPCISD::ANDI_rec_1_GT_BIT
@ ANDI_rec_1_GT_BIT
Definition: PPCISelLowering.h:263
llvm::PPCTargetLowering::CallFlags::HasNest
const bool HasNest
Definition: PPCISelLowering.h:1167
llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition: TargetLowering.h:2442
llvm::CC_PPC32_SVR4_ByVal
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ISD::EH_SJLJ_LONGJMP
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:147
llvm::TargetLowering::isInTailCallPosition
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
Definition: TargetLowering.cpp:51
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
llvm::TargetLowering::CallLoweringInfo::setSExtResult
CallLoweringInfo & setSExtResult(bool Value=true)
Definition: TargetLowering.h:4149
llvm::MVT::integer_valuetypes
static auto integer_valuetypes()
Definition: MachineValueType.h:1461
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:747
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:930
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
PPCSubtarget.h
CommandLine.h
llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1404
llvm::PPCISD::STXVD2X
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
Definition: PPCISelLowering.h:581
llvm::PPCII::MO_PCREL_FLAG
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:113
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:116
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::ISD::STRICT_FDIV
@ STRICT_FDIV
Definition: ISDOpcodes.h:403
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1008
TargetLowering.h
llvm::PPCSubtarget::hasP8Altivec
bool hasP8Altivec() const
Definition: PPCSubtarget.h:283
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
llvm::BlockAddressSDNode::getOffset
int64_t getOffset() const
Definition: SelectionDAGNodes.h:2189
llvm::ISD::FSHR
@ FSHR
Definition: ISDOpcodes.h:697
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition: SelectionDAG.cpp:7824
llvm::PPCISD::STRICT_FCTIDUZ
@ STRICT_FCTIDUZ
Definition: PPCISelLowering.h:493
llvm::PPCTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
Definition: PPCISelLowering.cpp:11133
llvm::PPCISD::STFIWX
@ STFIWX
STFIWX - The STFIWX instruction.
Definition: PPCISelLowering.h:524
llvm::LoopBase::getSubLoops
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:159
R2
#define R2(n)
llvm::PPC::MOF_RPlusLo
@ MOF_RPlusLo
Definition: PPCISelLowering.h:707
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1617
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:633
llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:362
llvm::TargetFrameLowering::getStackAlignment
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
Definition: TargetFrameLowering.h:95
llvm::TargetLowering::CallLoweringInfo::IsVarArg
bool IsVarArg
Definition: TargetLowering.h:4025
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
llvm::ISD::STRICT_FP_TO_SINT
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:441
llvm::CCValAssign::AExt
@ AExt
Definition: CallingConvLower.h:37
llvm::PPCISD::CR6SET
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
Definition: PPCISelLowering.h:315
llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:312
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1161
llvm::PPCFrameLowering::getReturnSaveOffset
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
Definition: PPCFrameLowering.h:149
llvm::ISD::GlobalAddress
@ GlobalAddress
Definition: ISDOpcodes.h:78
llvm::TargetOptions::NoInfsFPMath
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
Definition: TargetOptions.h:169
EnsureStackAlignment
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
Definition: PPCISelLowering.cpp:3983
llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:728
llvm::ExternalSymbolSDNode
Definition: SelectionDAGNodes.h:2217
llvm::PPCTargetLowering::shouldConvertConstantLoadToIntImm
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
Definition: PPCISelLowering.cpp:16595
llvm::PPCISD::TC_RETURN
@ TC_RETURN
TC_RETURN - A tail call return.
Definition: PPCISelLowering.h:312
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:96
GlobalValue.h
llvm::PPCSubtarget::hasFPRND
bool hasFPRND() const
Definition: PPCSubtarget.h:274
llvm::PPCISD::STRICT_FCFIDUS
@ STRICT_FCFIDUS
Definition: PPCISelLowering.h:500
MachineLoopInfo.h
haveEfficientBuildVectorPattern
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
Definition: PPCISelLowering.cpp:9016
llvm::SelectionDAG::getTargetFrameIndex
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:703
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1126
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1280
llvm::PPCTargetLowering::preferIncOfAddToSubOfNot
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Definition: PPCISelLowering.cpp:1603
llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition: SelectionDAGNodes.h:1485
CalculateStackSlotUsed
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
Definition: PPCISelLowering.cpp:3935
llvm::AtomicCmpXchgInst::getNewValOperand
Value * getNewValOperand()
Definition: Instructions.h:647
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:31
TargetMachine.h
llvm::ISD::CTLZ
@ CTLZ
Definition: ISDOpcodes.h:702
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
isFunctionGlobalAddress
static bool isFunctionGlobalAddress(SDValue Callee)
Definition: PPCISelLowering.cpp:5119
llvm::PPCSubtarget::hasFRES
bool hasFRES() const
Definition: PPCSubtarget.h:268
CalculateStackSlotAlignment
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
Definition: PPCISelLowering.cpp:3893
llvm::PPCISD::SC
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Definition: PPCISelLowering.h:418
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:220
SelectionDAGNodes.h
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:713
Constants.h
llvm::SelectionDAG::UpdateNodeOperands
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
Definition: SelectionDAG.cpp:9304
llvm::PPCFunctionInfo::setReturnAddrSaveIndex
void setReturnAddrSaveIndex(int idx)
Definition: PPCMachineFunctionInfo.h:165
llvm::SDNode::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this node.
Definition: SelectionDAGNodes.h:707
llvm::PPCISD::GlobalBaseReg
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
Definition: PPCISelLowering.h:156
llvm::ISD::STRICT_FRINT
@ STRICT_FRINT
Definition: ISDOpcodes.h:421
llvm::PowerOf2Floor
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition: MathExtras.h:722
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:763
llvm::PPCTargetLowering::isFPImmLegal
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Definition: PPCISelLowering.cpp:16944
llvm::SelectionDAG::getBoolExtOrTrunc
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
Definition: SelectionDAG.cpp:1379
llvm::PPCSubtarget
Definition: PPCSubtarget.h:71
llvm::ISD::ArgFlagsTy::isByVal
bool isByVal() const
Definition: TargetCallingConv.h:85
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalizeOps
bool isBeforeLegalizeOps() const
Definition: TargetLowering.h:3821
llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition: MachineOperand.h:782
llvm::SelectionDAG::getTargetBlockAddress
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:745
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:674
llvm::PPCTargetLowering::hasSPE
bool hasSPE() const
Definition: PPCISelLowering.cpp:1599
llvm::PPCSubtarget::getTOCPointerRegister
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:424
llvm::ISD::SETGE
@ SETGE
Definition: ISDOpcodes.h:1446
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SelectionDAG::getTruncStore
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7926
llvm::PPCISD::XSMINC
@ XSMINC
Definition: PPCISelLowering.h:56
llvm::PPCISD::LFIWZX
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
Definition: PPCISelLowering.h:534
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:148
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:754
llvm::PPCFrameLowering
Definition: PPCFrameLowering.h:22
llvm::SelectionDAG::getObjectPtrOffset
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
Definition: SelectionDAG.h:926
llvm::PPC::MOF_RPlusR
@ MOF_RPlusR
Definition: PPCISelLowering.h:711
llvm::User
Definition: User.h:44
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:971
Intr
unsigned Intr
Definition: AMDGPUBaseInfo.cpp:2375
llvm::PPC::MOF_ZExt
@ MOF_ZExt
Definition: PPCISelLowering.h:701
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:781
llvm::SelectionDAG::getTargetLoweringInfo
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:458
llvm::ISD::STRICT_FNEARBYINT
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:422
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
Intrinsics.h
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2517
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:58
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1449
isSplat
static bool isSplat(ArrayRef< Value * > VL)
Definition: SLPVectorizer.cpp:260
llvm::PPCISD::XXSPLTI_SP_TO_DP
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
Definition: PPCISelLowering.h:109
llvm::JumpTableSDNode
Definition: SelectionDAGNodes.h:1842
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:308
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::GlobalObject
Definition: GlobalObject.h:27
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3415
StoreTailCallArgumentsToStackSlot
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
Definition: PPCISelLowering.cpp:4981
llvm::PPCTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
Definition: PPCISelLowering.cpp:11225
llvm::KnownBits::One
APInt One
Definition: KnownBits.h:25
BuildVSLDOI
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
Definition: PPCISelLowering.cpp:8991
MCContext.h
CalculateTailCallSPDiff
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
Definition: PPCISelLowering.cpp:4647
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::PPCISD::VCMP_rec
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
Definition: PPCISelLowering.h:285
llvm::SelectionDAG::MaskedValueIsZero
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
Definition: SelectionDAG.cpp:2514
llvm::PPCTargetLowering::CallFlags::CallConv
const CallingConv::ID CallConv
Definition: PPCISelLowering.h:1162
llvm::PPCSubtarget::isISA3_1
bool isISA3_1() const
Definition: PPCSubtarget.h:339
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:928
isXXBRShuffleMaskHelper
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
Definition: PPCISelLowering.cpp:2352
llvm::PPCTargetLowering::isLegalAddressingMode
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
Definition: PPCISelLowering.cpp:16285
llvm::TargetLowering::DAGCombinerInfo
Definition: TargetLowering.h:3809
llvm::PPCTargetLowering::CallFlags
Structure that collects some common arguments that get passed around between the functions for call l...
Definition: PPCISelLowering.h:1161
llvm::PPCFunctionInfo::setVarArgsNumFPR
void setVarArgsNumFPR(unsigned Num)
Definition: PPCMachineFunctionInfo.h:245
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
Param
Value * Param
Definition: NVPTXLowerArgs.cpp:164
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:769
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:329
PPCCCState.h
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:692
llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition: TargetLowering.h:233
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
llvm::ISD::FMINNUM_IEEE
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition: ISDOpcodes.h:950
llvm::ISD::LLROUND
@ LLROUND
Definition: ISDOpcodes.h:932
llvm::PPCFunctionInfo::setUsesTOCBasePtr
void setUsesTOCBasePtr()
Definition: PPCMachineFunctionInfo.h:218
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:232
llvm::PPCSubtarget::hasFPCVT
bool hasFPCVT() const
Definition: PPCSubtarget.h:275
llvm::PPCSubtarget::isISA3_0
bool isISA3_0() const
Definition: PPCSubtarget.h:338
llvm::PPCISD::ATOMIC_CMP_SWAP_8
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
Definition: PPCISelLowering.h:594
llvm::TargetLowering::CallLoweringInfo::setDebugLoc
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
Definition: TargetLowering.h:4059
llvm::PPCISD::VADD_SPLAT
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
Definition: PPCISelLowering.h:414
ANDIGlueBug
cl::opt< bool > ANDIGlueBug
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
llvm::MachinePointerInfo::getGOT
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
Definition: MachineOperand.cpp:1015
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
callIntrinsic
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
Definition: PPCISelLowering.cpp:11334
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::ISD::DELETED_NODE
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition: TargetLowering.h:2287
llvm::TypeSize::Fixed
static TypeSize Fixed(ScalarTy MinVal)
Definition: TypeSize.h:441
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::TargetLoweringBase::MaxLoadsPerMemcmpOptSize
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3374
llvm::PPCSubtarget::hasEFPU2
bool hasEFPU2() const
Definition: PPCSubtarget.h:278
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
llvm::Function::getFnAttribute
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:654
llvm::PPCFunctionInfo::setHasNonRISpills
void setHasNonRISpills()
Definition: PPCMachineFunctionInfo.h:206
llvm::ISD::isUNINDEXEDLoad
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
Definition: SelectionDAGNodes.h:3056
llvm::TargetOptions::PPCGenScalarMASSEntries
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
Definition: TargetOptions.h:355
llvm::SDNode::uses
iterator_range< use_iterator > uses()
Definition: SelectionDAGNodes.h:794
PPCFrameLowering.h
llvm::PPC::MOF_Vector
@ MOF_Vector
Definition: PPCISelLowering.h:720
llvm::PPCTargetLowering::getConstraintType
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
Definition: PPCISelLowering.cpp:15986
llvm::PPCTargetLowering::getNegatedExpression
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
Definition: PPCISelLowering.cpp:16854
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:369
llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:144
llvm::Instruction
Definition: Instruction.h:42
llvm::PPCTargetLowering::emitMaskedAtomicRMWIntrinsic
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
Definition: PPCISelLowering.cpp:18145
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:147
llvm::DataLayout::getABITypeAlign
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:829
llvm::PPCSubtarget::descriptorTOCAnchorOffset
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:406
llvm::ShuffleVectorSDNode::getMask
ArrayRef< int > getMask() const
Definition: SelectionDAGNodes.h:1523
llvm::PPCTargetLowering::isProfitableToHoist
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
Definition: PPCISelLowering.cpp:16741
getEstimateRefinementSteps
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:13030
isSplatBV
static bool isSplatBV(SDValue Op)
Definition: PPCISelLowering.cpp:14759
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:773
llvm::PPCSubtarget::isPPC64
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
Definition: PPCSubtarget.cpp:255
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1466
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:737
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::PPC::PRED_BIT_SET
@ PRED_BIT_SET
Definition: PPCPredicates.h:57
llvm::PPCISD::SRA_ADDZE
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Definition: PPCISelLowering.h:180
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::PPCISD::MFVSR
@ MFVSR
Direct move from a VSX register to a GPR.
Definition: PPCISelLowering.h:220
llvm::CCValAssign::getCustomMem
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
Definition: CallingConvLower.h:114
llvm::TargetLoweringBase::getSDagStackGuard
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
Definition: TargetLoweringBase.cpp:2005
llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:279
APFloat.h
llvm::DataLayout::getLargestLegalIntTypeSizeInBits
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:864
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:927
PPC.h
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:926
llvm::ISD::FP16_TO_FP
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:896
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1163
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1765
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::PPCTargetLowering::SelectAddressPCRel
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
Definition: PPCISelLowering.cpp:2922
UseAbsoluteJumpTables
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
llvm::InlineAsm::Kind_Mem
@ Kind_Mem
Definition: InlineAsm.h:243
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7599
llvm::MVT::INVALID_SIMPLE_VALUE_TYPE
@ INVALID_SIMPLE_VALUE_TYPE
Definition: MachineValueType.h:38
DebugLoc.h
llvm::PPCISD::PPC32_GOT
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
Definition: PPCISelLowering.h:320
SmallPtrSet.h
llvm::PPC::isVSLDOIShuffleMask
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
Definition: PPCISelLowering.cpp:2099
llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition: MachineFrameInfo.h:518
llvm::PPC::MOF_DoubleWordInt
@ MOF_DoubleWordInt
Definition: PPCISelLowering.h:718
llvm::PPCISD::XXSWAPD
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
Definition: PPCISelLowering.h:435
llvm::TargetLoweringBase::MaxStoresPerMemcpy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
Definition: TargetLowering.h:3351
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
BuildIntrinsicOp
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
Definition: PPCISelLowering.cpp:8962
llvm::PPCTargetLowering::CallFlags::IsIndirect
const bool IsIndirect
Definition: PPCISelLowering.h:1166
llvm::MCSymbol::getName
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:198
llvm::PPCSubtarget::hasPartwordAtomics
bool hasPartwordAtomics() const
Definition: PPCSubtarget.h:316
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:666
llvm::PPCISD::ADDIS_GOT_TPREL_HA
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
Definition: PPCISelLowering.h:329
llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition: CallingConvLower.h:153
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1430
llvm::ISD::TargetGlobalAddress
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::PPCSubtarget::getInstrInfo
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:217
llvm::TargetLoweringBase::insertSSPDeclarations
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
Definition: TargetLoweringBase.cpp:1989
llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition: SelectionDAGNodes.h:1751
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1130
llvm::ISD::FSINCOS
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:960
llvm::PPC::PRED_LE
@ PRED_LE
Definition: PPCPredicates.h:28
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:149
llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition: TargetLowering.h:1269
llvm::CallInst::isTailCall
bool isTailCall() const
Definition: Instructions.h:1668
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Triple::isOSAIX
bool isOSAIX() const
Tests whether the OS is AIX.
Definition: Triple.h:651
llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition: TargetLowering.h:2235
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:729
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1138
llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
llvm::CallBase::getCallingConv
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1455
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:396
llvm::array_lengthof
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLArrayExtras.h:29
MCSectionXCOFF.h
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:304
llvm::PPCSubtarget::hasFCPSGN
bool hasFCPSGN() const
Definition: PPCSubtarget.h:265
isSignExtended
static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII)
Definition: PPCISelLowering.cpp:11490
llvm::PPCISD::SRL
@ SRL
These nodes represent PPC shifts.
Definition: PPCISelLowering.h:165
llvm::StringRef::getAsInteger
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:514
llvm::None
const NoneType None
Definition: None.h:24
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:731
prepareIndirectCall
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
Definition: PPCISelLowering.cpp:5377
llvm::PPC::DIR_PWR_FUTURE
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
llvm::SelectionDAG::getEVTAlign
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
Definition: SelectionDAG.cpp:1222
llvm::MVT::v4i16
@ v4i16
Definition: MachineValueType.h:97
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:182
convertIntToFP
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
Definition: PPCISelLowering.cpp:8366
llvm::CCValAssign::isRegLoc
bool isRegLoc() const
Definition: CallingConvLower.h:143
llvm::TargetLowering::makeLibCall
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Definition: TargetLowering.cpp:143
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:514
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:84
llvm::CallingConv::C
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::MachineInstr::NoFPExcept
@ NoFPExcept
Definition: MachineInstr.h:110
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::InlineAsm::getKind
static unsigned getKind(unsigned Flags)
Definition: InlineAsm.h:344
llvm::RISCVISD::DIVW
@ DIVW
Definition: RISCVISelLowering.h:69
llvm::PPCISD::MTVSRZ
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
Definition: PPCISelLowering.h:226
Type.h
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1431
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
BranchProbability.h
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::TargetLoweringObjectFile::getFunctionEntryPointSymbol
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
Definition: TargetLoweringObjectFile.h:282
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1436
llvm::PPCTargetLowering::SelectOptimalAddrMode
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Definition: PPCISelLowering.cpp:17953
llvm::PPCISD::LFIWAX
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
Definition: PPCISelLowering.h:529
llvm::PPCISD::STORE_VEC_BE
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
Definition: PPCISelLowering.h:586
provablyDisjointOr
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
Definition: PPCISelLowering.cpp:2592
llvm::EVT::isExtended
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:134
llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:698
llvm::MachineJumpTableInfo::EK_LabelDifference32
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
Definition: MachineJumpTableInfo.h:68
llvm::PPCSubtarget::isGVIndirectSymbol
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
Definition: PPCSubtarget.cpp:245
llvm::PPCTargetLowering::getSDagStackGuard
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
Definition: PPCISelLowering.cpp:16938
llvm::PPCISD::FP_TO_SINT_IN_VSR
@ FP_TO_SINT_IN_VSR
Definition: PPCISelLowering.h:82
llvm::PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
Definition: PPCISelLowering.cpp:18169
llvm::MachineInstrBuilder::cloneMemRefs
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Definition: MachineInstrBuilder.h:213
llvm::PPCTargetLowering::getPrefLoopAlignment
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
Definition: PPCISelLowering.cpp:15935
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:642
llvm::MVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: MachineValueType.h:348
llvm::PPCII::MO_GOT_TPREL_PCREL_FLAG
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:154
llvm::Instruction::hasAtomicLoad
bool hasAtomicLoad() const
Return true if this atomic instruction loads from memory.
Definition: Instruction.cpp:631
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:175
llvm::PPCISD::XXMFACC
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
Definition: PPCISelLowering.h:488
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::LoopBase::block_begin
block_iterator block_begin() const
Definition: LoopInfo.h:192
llvm::PPCISD::FCTIWZ
@ FCTIWZ
Definition: PPCISelLowering.h:73
llvm::CodeModel::Model
Model
Definition: CodeGen.h:28
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:918
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
llvm::TargetLoweringBase::PredictableSelectIsExpensive
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
Definition: TargetLowering.h:3392
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
llvm::PPCFunctionInfo::isLRStoreRequired
bool isLRStoreRequired() const
Definition: PPCMachineFunctionInfo.h:216
llvm::MVT::v2i8
@ v2i8
Definition: MachineValueType.h:83
llvm::ISD::BlockAddress
@ BlockAddress
Definition: ISDOpcodes.h:84
llvm::PPC::PRED_EQ
@ PRED_EQ
Definition: PPCPredicates.h:29
llvm::TargetLowering::CallLoweringInfo::Outs
SmallVector< ISD::OutputArg, 32 > Outs
Definition: TargetLowering.h:4048
llvm::MachineFunction::getMMI
MachineModuleInfo & getMMI() const
Definition: MachineFunction.h:607
llvm::PPC::MOF_None
@ MOF_None
Definition: PPCISelLowering.h:697
llvm::TargetLowering::getPICJumpTableRelocBaseExpr
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
Definition: TargetLowering.cpp:468
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::ISD::STRICT_FSETCCS
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:476
llvm::TargetLowering::CallLoweringInfo::setLibCallee
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
Definition: TargetLowering.h:4070
llvm::PPCTargetLowering::hasInlineStackProbe
bool hasInlineStackProbe(MachineFunction &MF) const override
Definition: PPCISelLowering.cpp:12003
llvm::PPCSubtarget::isAIXABI
bool isAIXABI() const
Definition: PPCSubtarget.h:367
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::PPCSubtarget::hasLDBRX
bool hasLDBRX() const
Definition: PPCSubtarget.h:299
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::MachineInstrBuilder::addFrameIndex
const MachineInstrBuilder & addFrameIndex(int Idx) const
Definition: MachineInstrBuilder.h:152
llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition: TargetLowering.h:2304
llvm::PPCSubtarget::isSVR4ABI
bool isSVR4ABI() const
Definition: PPCSubtarget.h:368
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::TargetLoweringBase::MaxStoresPerMemcpyOptSize
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3353
llvm::PPCTargetLowering::SelectAddressRegRegOnly
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
Definition: PPCISelLowering.cpp:2884
llvm::PPCISD::CALL_NOP
@ CALL_NOP
Definition: PPCISelLowering.h:187
llvm::MachineInstrBuilder::setMIFlag
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
Definition: MachineInstrBuilder.h:278
llvm::TargetLowering::softenSetCCOperands
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
Definition: TargetLowering.cpp:288
llvm::ISD::SMIN
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:660
llvm::PPC::isXXBRHShuffleMask
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
Definition: PPCISelLowering.cpp:2365
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:628
llvm::PPCISD::ADD_TLS
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS model, produces an ADD instruction that ...
Definition: PPCISelLowering.h:343
llvm::SPIRV::Decoration::Alignment
@ Alignment
llvm::cl::opt< bool >
llvm::APFloat
Definition: APFloat.h:701
llvm::SDNode::use_begin
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Definition: SelectionDAGNodes.h:788
llvm::NVPTX::PTXLdStInstCode::V4
@ V4
Definition: NVPTX.h:124
llvm::CCValAssign::LocInfo
LocInfo
Definition: CallingConvLower.h:33
llvm::ISD::Register
@ Register
Definition: ISDOpcodes.h:74
llvm::ISD::GET_DYNAMIC_AREA_OFFSET
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1240
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:240
llvm::SDValue::getNumOperands
unsigned getNumOperands() const
Definition: SelectionDAGNodes.h:1130
llvm::PPCFrameLowering::getLinkageSize
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
Definition: PPCFrameLowering.h:165
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::PPCSubtarget::isTargetLinux
bool isTargetLinux() const
Definition: PPCSubtarget.h:365
llvm::peekThroughBitcasts
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
Definition: SelectionDAG.cpp:10639
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::SignExtend32
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:801
llvm::PPCTargetLowering::getPICJumpTableRelocBaseExpr
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
Definition: PPCISelLowering.cpp:3197
llvm::PPCTargetLowering::getExceptionSelectorRegister
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Definition: PPCISelLowering.cpp:16810
llvm::MachineLoop
Definition: MachineLoopInfo.h:44
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:159
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
llvm::PPCII::MO_TPREL_HA
@ MO_TPREL_HA
Definition: PPC.h:164
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:86
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:921
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
AIXSSPCanaryWordName
static const char AIXSSPCanaryWordName[]
Definition: PPCISelLowering.cpp:148
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::CCValAssign::SExt
@ SExt
Definition: CallingConvLower.h:35
llvm::isUInt< 16 >
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition: Instructions.h:733
findConsecutiveLoad
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:13271
llvm::PPCII::MO_TLSGDM_FLAG
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:139
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1709
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::PPCISD::LXSIZX
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
Definition: PPCISelLowering.h:539
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:124
llvm::SelectionDAG::MaxRecursionDepth
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:428
llvm::TLSModel::LocalDynamic
@ LocalDynamic
Definition: CodeGen.h:44
uint64_t
llvm::PPC::MOF_ScalarFloat
@ MOF_ScalarFloat
Definition: PPCISelLowering.h:719
llvm::PPCISD::CALL_RM
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
Definition: PPCISelLowering.h:205
llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition: MachineFrameInfo.h:469
llvm::InlineAsm::Kind_Imm
@ Kind_Imm
Definition: InlineAsm.h:242
RuntimeLibcalls.h
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:820
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1618
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:238
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:937
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1346
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:636
llvm::TargetLowering::verifyReturnAddressArgumentIsConstant
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:6628
llvm::PPCSubtarget::hasP9Altivec
bool hasP9Altivec() const
Definition: PPCSubtarget.h:286
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:966
llvm::PPCISD::MFFS
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
Definition: PPCISelLowering.h:305
llvm::PPCISD::FIRST_NUMBER
@ FIRST_NUMBER
Definition: PPCISelLowering.h:48
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:78
llvm::TargetLowering::CallLoweringInfo::Chain
SDValue Chain
Definition: TargetLowering.h:4021
llvm::SelectionDAG::getIntPtrConstant
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1572
llvm::PPC::isVMRGHShuffleMask
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
Definition: PPCISelLowering.cpp:1980
llvm::PPCTargetLowering::LowerAsmOperandForConstraint
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
Definition: PPCISelLowering.cpp:16209
llvm::SPIRV::Opcode
Opcode
Definition: SPIRVBaseInfo.h:718
llvm::ISD::AssertZext
@ AssertZext
Definition: ISDOpcodes.h:62
llvm::TargetLoweringBase::Promote
@ Promote
Definition: TargetLowering.h:197
llvm::PPC::MOF_SExt
@ MOF_SExt
Definition: PPCISelLowering.h:700
llvm::PPCISD::CALL_NOTOC_RM
@ CALL_NOTOC_RM
Definition: PPCISelLowering.h:207
llvm::PPC::DIR_PWR5X
@ DIR_PWR5X
Definition: PPCSubtarget.h:57
llvm::TargetLoweringBase::NegatibleCost::Expensive
@ Expensive
llvm::ISD::TRAP
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1133
llvm::DataLayout::isLittleEndian
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:244
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:39
llvm::TargetLowering::getJumpTableEncoding
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Definition: TargetLowering.cpp:440
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::PPCSubtarget::is32BitELFABI
bool is32BitELFABI() const
Definition: PPCSubtarget.h:372
llvm::PPCSubtarget::needsSwapsForVSXMemOps
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:355
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
prepareDescriptorIndirectCall
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:5388
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::TargetRegisterInfo::getMatchingSuperReg
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
Definition: TargetRegisterInfo.h:588
llvm::DenseMap
Definition: DenseMap.h:716
llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:776
MCSymbolXCOFF.h
llvm::TargetLowering::CallLoweringInfo::CallConv
CallingConv::ID CallConv
Definition: TargetLowering.h:4042
isAlternatingShuffMask
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
Definition: PPCISelLowering.cpp:14745
llvm::ISD::OutputArg
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
Definition: TargetCallingConv.h:233
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
llvm::TargetLoweringBase::setStackPointerRegisterToSaveRestore
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
Definition: TargetLowering.h:2253
PPCInstrInfo.h
llvm::GlobalValue::hasComdat
bool hasComdat() const
Definition: GlobalValue.h:227
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:909
llvm::PPCSubtarget::hasSTFIWX
bool hasSTFIWX() const
Definition: PPCSubtarget.h:272
llvm::TargetLowering::CW_Register
@ CW_Register
Definition: TargetLowering.h:4450
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::ISD::LRINT
@ LRINT
Definition: ISDOpcodes.h:933
llvm::FrameIndexSDNode::getIndex
int getIndex() const
Definition: SelectionDAGNodes.h:1776
llvm::PPC::AddrMode
AddrMode
Definition: PPCISelLowering.h:731
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:8908
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalize
bool isBeforeLegalize() const
Definition: TargetLowering.h:3820
llvm::MachineFrameInfo::getObjectAlign
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
Definition: MachineFrameInfo.h:483
llvm::TargetLoweringBase::setPrefFunctionAlignment
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
Definition: TargetLowering.h:2456
llvm::ISD::FP_TO_FP16
@ FP_TO_FP16
Definition: ISDOpcodes.h:897
combineBVOfConsecutiveLoads
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
Definition: PPCISelLowering.cpp:14095
llvm::DenormalMode
Represent subnormal handling kind for floating point instruction inputs and outputs.
Definition: FloatingPointMode.h:69
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::PPCTargetLowering::getRegisterByName
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
Definition: PPCISelLowering.cpp:16398
llvm::ISD::ATOMIC_CMP_SWAP
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1170
MCRegisterInfo.h
llvm::TargetLowering::AsmOperandInfo
This contains information for each constraint that we are lowering.
Definition: TargetLowering.h:4457
llvm::ISD::UADDSAT
@ UADDSAT
Definition: ISDOpcodes.h:341
llvm::TargetLowering::CallLoweringInfo::DL
SDLoc DL
Definition: TargetLowering.h:4046
llvm::PPCTargetLowering::enableAggressiveFMAFusion
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Definition: PPCISelLowering.cpp:1786
llvm::ISD::SSUBSAT
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:492
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2339
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:739
llvm::PPCFunctionInfo::getVarArgsNumFPR
unsigned getVarArgsNumFPR() const
Definition: PPCMachineFunctionInfo.h:244
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:439
llvm::SDNode::dump
void dump() const
Dump this node, for debugging.
Definition: SelectionDAGDumper.cpp:549
llvm::SelectionDAG::getSExtOrTrunc
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
Definition: SelectionDAG.cpp:1367
ArrayRef.h
llvm::TargetLoweringBase::NegatibleCost
NegatibleCost
Enum that specifies when a float negation is beneficial.
Definition: TargetLowering.h:276
llvm::PPCISD::SRA
@ SRA
Definition: PPCISelLowering.h:166
llvm::SelectionDAG::getAnyExtOrTrunc
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
Definition: SelectionDAG.cpp:1361
llvm::CCValAssign::getMem
static CCValAssign getMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
Definition: CallingConvLower.h:100
DisableInnermostLoopAlign32
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
llvm::PPCSubtarget::isPredictableSelectIsExpensive
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:436
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::GlobalAddressSDNode::getOffset
int64_t getOffset() const
Definition: SelectionDAGNodes.h:1752
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:163
llvm::TargetLoweringBase::hasBigEndianPartOrdering
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
Definition: TargetLowering.h:1616
llvm::APInt::getBoolValue
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:452
llvm::PPCISD::Hi
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Definition: PPCISelLowering.h:135
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition: MachineMemOperand.h:130
llvm::PPC::PRED_NE
@ PRED_NE
Definition: PPCPredicates.h:32
llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition: MachineValueType.h:869
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:565
llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition: TargetCallingConv.h:195
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2352
llvm::MachineFrameInfo::setHasTailCall
void setHasTailCall(bool V=true)
Definition: MachineFrameInfo.h:631
llvm::PPCII::MO_PIC_FLAG
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:109
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1404
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:172
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:46
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:118
llvm::ISD::SETOGT
@ SETOGT
Definition: ISDOpcodes.h:1428
llvm::APFloatBase::PPCDoubleDouble
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:185
llvm::PPCISD::PROBED_ALLOCA
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
Definition: PPCISelLowering.h:153
llvm::PPCTargetLowering::getStackProbeSize
unsigned getStackProbeSize(MachineFunction &MF) const
Definition: PPCISelLowering.cpp:12011
llvm::TargetLowering::CallLoweringInfo
This structure contains all information that is necessary for lowering calls.
Definition: TargetLowering.h:4020
llvm::PPCFunctionInfo::setVarArgsStackOffset
void setVarArgsStackOffset(int Offset)
Definition: PPCMachineFunctionInfo.h:228
IRBuilder.h
llvm::PPCISD::EXTRACT_SPE
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
Definition: PPCISelLowering.h:238
llvm::ISD::ArgFlagsTy::isNest
bool isNest() const
Definition: TargetCallingConv.h:118
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SelectionDAG::getVectorShuffle
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
Definition: SelectionDAG.cpp:1878
llvm::EVT::getIntegerVT
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
llvm::PPCISD::EH_SJLJ_LONGJMP
@ EH_SJLJ_LONGJMP
Definition: PPCISelLowering.h:273
DisableSCO
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::ISD::ADJUST_TRAMPOLINE
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1130
llvm::ISD::MULHS
@ MULHS
Definition: ISDOpcodes.h:638
llvm::MachineFrameInfo::CreateFixedObject
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
Definition: MachineFrameInfo.cpp:83
llvm::FunctionLoweringInfo
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Definition: FunctionLoweringInfo.h:52
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::X86AS::FS
@ FS
Definition: X86.h:196
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:883
llvm::TargetLoweringBase::ArgListEntry
Definition: TargetLowering.h:282
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::LoopBase::getLoopDepth
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:96
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:672
llvm::PPCTargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
Definition: PPCISelLowering.cpp:16656
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1438
llvm::CCValAssign::getCustomReg
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
Definition: CallingConvLower.h:91
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1584
llvm::PPC::DIR_E5500
@ DIR_E5500
Definition: PPCSubtarget.h:53
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:234
llvm::ISD::STACKMAP
@ STACKMAP
Definition: ISDOpcodes.h:1293
llvm::SmallSet::begin
const_iterator begin() const
Definition: SmallSet.h:225
llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition: MachineInstrBuilder.h:202
llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:1120
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1424
llvm::PPCISD::BUILD_FP128
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
Definition: PPCISelLowering.h:229
llvm::ISD::EH_DWARF_CFA
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:9652
llvm::PPCTargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
Definition: PPCISelLowering.cpp:1778
MachineModuleInfo.h
llvm::PPCII::MO_TLS
@ MO_TLS
Definition: PPC.h:173
llvm::ISD::TargetGlobalTLSAddress
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:165
llvm::SelectionDAG::getBitcast
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
Definition: SelectionDAG.cpp:2164
llvm::ISD::RETURNADDR
@ RETURNADDR
Definition: ISDOpcodes.h:95
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition: SelectionDAGNodes.h:3032
llvm::PPCISD::EXTRACT_VSX_REG
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
Definition: PPCISelLowering.h:485
combineBVOfVecSExt
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:14224
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
isConsecutiveLSLoc
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:13154
llvm::FastISel
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:65
llvm::MachineInstrBuilder::addUse
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Definition: MachineInstrBuilder.h:123
llvm::RISCVISD::SRAW
@ SRAW
Definition: RISCVISelLowering.h:64
llvm::PPCTargetLowering::getExceptionPointerRegister
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
Definition: PPCISelLowering.cpp:16805
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::PPCISD::DYNAREAOFFSET
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
Definition: PPCISelLowering.h:149
RA
SI optimize exec mask operations pre RA
Definition: SIOptimizeExecMaskingPreRA.cpp:71
llvm::TargetLowering::CW_Memory
@ CW_Memory
Definition: TargetLowering.h:4451
R6
#define R6(n)
llvm::MVT::v256i1
@ v256i1
Definition: MachineValueType.h:74
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2373
llvm::PPCISD::XSMAXC
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
Definition: PPCISelLowering.h:55
llvm::PPC::DIR_PWR8
@ DIR_PWR8
Definition: PPCSubtarget.h:61
llvm::CodeGenOpt::Aggressive
@ Aggressive
Definition: CodeGen.h:56
llvm::SelectionDAG::CreateStackTemporary
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
Definition: SelectionDAG.cpp:2282
llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition: ISDOpcodes.h:750
llvm::ISD::VASTART
@ VASTART
Definition: ISDOpcodes.h:1087
llvm::PPCTargetLowering::isOffsetFoldingLegal
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Definition: PPCISelLowering.cpp:16444
llvm::PPC::isVMRGEOShuffleMask
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
Definition: PPCISelLowering.cpp:2070
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1322
llvm::MachinePointerInfo::getWithOffset
MachinePointerInfo getWithOffset(int64_t O) const
Definition: MachineMemOperand.h:79
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
info
lazy value info
Definition: LazyValueInfo.cpp:58
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::SelectionDAG::getTokenFactor
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
Definition: SelectionDAG.cpp:11799
llvm::TargetLowering::CW_Default
@ CW_Default
Definition: TargetLowering.h:4453
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::ISD::ArgFlagsTy::setByValSize
void setByValSize(unsigned S)
Definition: TargetCallingConv.h:173
llvm::SelectionDAG::getCALLSEQ_END
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
Definition: SelectionDAG.h:955
llvm::isAcquireOrStronger
bool isAcquireOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:128
llvm::BranchProbability::getOne
static BranchProbability getOne()
Definition: BranchProbability.h:50
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:677
llvm::Type::FP128TyID
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
llvm::isIntS16Immediate
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
Definition: PPCISelLowering.cpp:2573
llvm::CodeGenOpt::None
@ None
Definition: CodeGen.h:53
llvm::PPCSubtarget::hasDirectMove
bool hasDirectMove() const
Definition: PPCSubtarget.h:318
llvm::TargetLowering::CallLoweringInfo::Ins
SmallVector< ISD::InputArg, 32 > Ins
Definition: TargetLowering.h:4050
llvm::ISD::ConstantPool
@ ConstantPool
Definition: ISDOpcodes.h:82
llvm::RetCC_PPC
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
TargetOptions.h
llvm::ISD::GlobalTLSAddress
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
llvm::CCState::isVarArg
bool isVarArg() const
Definition: CallingConvLower.h:258
llvm::CCValAssign::getValNo
unsigned getValNo() const
Definition: CallingConvLower.h:140
llvm::MemSDNode::getAAInfo
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Definition: SelectionDAGNodes.h:1310
llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:471
llvm::BlockAddress
The address of a basic block.
Definition: Constants.h:849
llvm::ISD::TargetConstantPool
@ TargetConstantPool
Definition: ISDOpcodes.h:168
llvm::MachineInstrBuilder::addRegMask
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Definition: MachineInstrBuilder.h:197
llvm::TargetLowering::CallLoweringInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:4045
llvm::BuildVectorSDNode::isConstant
bool isConstant() const
Definition: SelectionDAG.cpp:11684
llvm::SelectionDAG::getTargetConstantPool
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=None, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:714
llvm::PPCSubtarget::hasFRE
bool hasFRE() const
Definition: PPCSubtarget.h:267
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::MVT::fixedlen_vector_valuetypes
static auto fixedlen_vector_valuetypes()
Definition: MachineValueType.h:1478
llvm::PPCISD::SCALAR_TO_VECTOR_PERMUTED
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
Definition: PPCISelLowering.h:256
llvm::ISD::isSEXTLoad
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
Definition: SelectionDAGNodes.h:3044
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::PPCISD::STRICT_FCFIDU
@ STRICT_FCFIDU
Definition: PPCISelLowering.h:498
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:154
llvm::PPCTargetLowering::emitEHSjLjLongJmp
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
Definition: PPCISelLowering.cpp:11902
llvm::ISD::isNormalLoad
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Definition: SelectionDAGNodes.h:3025
llvm::ISD::UMAX
@ UMAX
Definition: ISDOpcodes.h:663
llvm::ISD::PRE_INC
@ PRE_INC
Definition: ISDOpcodes.h:1373
llvm::ConstantPoolSDNode
Definition: SelectionDAGNodes.h:1863
None.h
llvm::SelectionDAG::getSelect
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:1110
llvm::BlockAddressSDNode::getBlockAddress
const BlockAddress * getBlockAddress() const
Definition: SelectionDAGNodes.h:2188
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::CCState::AllocateReg
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
Definition: CallingConvLower.h:349
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
llvm::ISD::STRICT_FTRUNC
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:429
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1624
llvm::BuildVectorSDNode::isConstantSplat
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
Definition: SelectionDAG.cpp:11394
llvm::Reloc::PIC_
@ PIC_
Definition: CodeGen.h:22
llvm::SDNode::use_end
static use_iterator use_end()
Definition: SelectionDAGNodes.h:792
DataLayout.h
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:49
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::countTrailingZeros
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:156
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::MVT::v2i32
@ v2i32
Definition: MachineValueType.h:107
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
isLoad
static bool isLoad(int Opcode)
Definition: ARCInstrInfo.cpp:53
llvm::TargetLowering::LowerToTLSEmulatedModel
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
Definition: TargetLowering.cpp:8758
llvm::TargetLowering::LowerAsmOperandForConstraint
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Definition: TargetLowering.cpp:5148
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:352
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:982
llvm::BuildVectorSDNode
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Definition: SelectionDAGNodes.h:1968
llvm::SDNodeFlags::setNoFPExcept
void setNoFPExcept(bool b)
Definition: SelectionDAGNodes.h:421
llvm::MachineRegisterInfo::hasOneNonDBGUse
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
Definition: MachineRegisterInfo.cpp:415
llvm::PPCISD::VCMP
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
Definition: PPCISelLowering.h:279
llvm::ShuffleVectorSDNode::getMaskElt
int getMaskElt(unsigned Idx) const
Definition: SelectionDAGNodes.h:1528
llvm::isReleaseOrStronger
bool isReleaseOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:132
llvm::Sched::Hybrid
@ Hybrid
Definition: TargetLowering.h:101
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::ISD::STRICT_SINT_TO_FP
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:448
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::ISD::SREM
@ SREM
Definition: ISDOpcodes.h:244
llvm::PPCISD::STRICT_FADDRTZ
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
Definition: PPCISelLowering.h:503
llvm::TargetLowering::DAGCombinerInfo::isAfterLegalizeDAG
bool isAfterLegalizeDAG() const
Definition: TargetLowering.h:3822
llvm::ISD::LLRINT
@ LLRINT
Definition: ISDOpcodes.h:934
llvm::PPCISD::STRICT_FCFIDS
@ STRICT_FCFIDS
Definition: PPCISelLowering.h:499
llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition: ISDOpcodes.h:251
llvm::PPCSubtarget::hasFPU
bool hasFPU() const
Definition: PPCSubtarget.h:279
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:10240
llvm::ISD::OutputArg::Flags
ArgFlagsTy Flags
Definition: TargetCallingConv.h:234
llvm::MVT::v2f32
@ v2f32
Definition: MachineValueType.h:161
llvm::PPCTargetLowering::SelectAddressEVXRegReg
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
Definition: PPCISelLowering.cpp:2604
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2515
PPCPerfectShuffle.h
llvm::TargetRegisterInfo::isTypeLegalForClass
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
Definition: TargetRegisterInfo.h:294
llvm::ISD::STRICT_FSUB
@ STRICT_FSUB
Definition: ISDOpcodes.h:401
uint32_t
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
Compiler.h
llvm::TargetLoweringBase::IsStrictFPEnabled
bool IsStrictFPEnabled
Definition: TargetLowering.h:3407
llvm::ISD::ArgFlagsTy
Definition: TargetCallingConv.h:27
llvm::TargetLoweringBase::MaxStoresPerMemmoveOptSize
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3388
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1134
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:93
llvm::PPCTargetLowering::SelectAddressRegImm34
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
Definition: PPCISelLowering.cpp:2835
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
llvm::PPCTargetLowering::CallFlags::IsTailCall
const bool IsTailCall
Definition: PPCISelLowering.h:1163
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetLowering::getCheaperNegatedExpression
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
Definition: TargetLowering.h:3960
llvm::PPCSubtarget::has64BitSupport
bool has64BitSupport() const
has64BitSupport - Return true if the selected CPU supports 64-bit instructions, regardless of whether...
Definition: PPCSubtarget.h:244
llvm::TargetLowering::isGAPlusOffset
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
Definition: TargetLowering.cpp:5045
llvm::PPCISD::FCFIDUS
@ FCFIDUS
Definition: PPCISelLowering.h:67
combineADDToADDZE
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:17048
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ConstantSDNode::getSExtValue
int64_t getSExtValue() const
Definition: SelectionDAGNodes.h:1585
llvm::SDNodeFlags::hasNoInfs
bool hasNoInfs() const
Definition: SelectionDAGNodes.h:428
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:922
llvm::SDValue::hasOneUse
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
Definition: SelectionDAGNodes.h:1170
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
llvm::TargetLowering::getRegForInlineAsmConstraint
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
Definition: TargetLowering.cpp:5225
isNByteElemShuffleMask
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
Definition: PPCISelLowering.cpp:2188
llvm::TargetMachine::shouldAssumeDSOLocal
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
Definition: TargetMachine.cpp:88
hasSameArgumentList
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
Definition: PPCISelLowering.cpp:4780
llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition: TargetLowering.h:2343
isValidSplatLoad
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
Definition: PPCISelLowering.cpp:9128
llvm::SDValue::getSimpleValueType
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:183
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:109
llvm::SDNode::ops
ArrayRef< SDUse > ops() const
Definition: SelectionDAGNodes.h:918
llvm::PPCFunctionInfo::getVarArgsStackOffset
int getVarArgsStackOffset() const
Definition: PPCMachineFunctionInfo.h:227
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:923
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
llvm::ISD::STRICT_FP_EXTEND
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:469
llvm::TargetLoweringBase::setMinStackArgumentAlignment
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
Definition: TargetLowering.h:2469
isScalarToVec
static SDValue isScalarToVec(SDValue Op)
Definition: PPCISelLowering.cpp:14778
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::PPCISD::PPC32_PICGOT
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
Definition: PPCISelLowering.h:324
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:392
llvm::PPCISD::LXVRZX
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
Definition: PPCISelLowering.h:555
setUsesTOCBasePtr
static void setUsesTOCBasePtr(MachineFunction &MF)
Definition: PPCISelLowering.cpp:3101
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:209
llvm::MachineMemOperand::MOVolatile
@ MOVolatile
The memory access is volatile.
Definition: MachineMemOperand.h:138
llvm::MVT::v512i1
@ v512i1
Definition: MachineValueType.h:75
llvm::TargetLowering::CallLoweringInfo::setIsPostTypeLegalization
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
Definition: TargetLowering.h:4169
llvm::isIndirectCall
static bool isIndirectCall(const MachineInstr &MI)
Definition: ARMBaseInstrInfo.h:654
llvm::MVT::v1i128
@ v1i128
Definition: MachineValueType.h:134
generateEquivalentSub
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
Definition: PPCISelLowering.cpp:13338
llvm::SignExtend64
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:817
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1404
llvm::ISD::InputArg::Flags
ArgFlagsTy Flags
Definition: TargetCallingConv.h:196
llvm::PPCISD::LD_VSX_LH
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
Definition: PPCISelLowering.h:564
llvm::GlobalValue::isStrongDefinitionForLinker
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:611
llvm::PPC::getSplatIdxForPPCMnemonics
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
Definition: PPCISelLowering.cpp:2445
llvm::PPCTargetLowering::isJumpTableRelative
bool isJumpTableRelative() const override
Definition: PPCISelLowering.cpp:3173
LowerMemOpCallTo
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
Definition: PPCISelLowering.cpp:5070
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:805
llvm::PPCISD::PADDI_DTPREL
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
Definition: PPCISelLowering.h:408
llvm::SelectionDAG::getConstantFP
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1628
llvm::APInt::clearBit
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1357
llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:715
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:134
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::PPCTargetLowering::EmitAtomicBinary
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
Definition: PPCISelLowering.cpp:11372
llvm::TargetLowering::C_RegisterClass
@ C_RegisterClass
Definition: TargetLowering.h:4432
llvm::PPC::AM_XForm
@ AM_XForm
Definition: PPCISelLowering.h:737
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::RetCC_PPC_Cold
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::ISD::XOR
@ XOR
Definition: ISDOpcodes.h:668
llvm::TargetLoweringBase::ArgListTy
std::vector< ArgListEntry > ArgListTy
Definition: TargetLowering.h:312
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:664
llvm::PPCTargetLowering::getScratchRegisters
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
Definition: PPCISelLowering.cpp:16793
llvm::PPCISD::LXVD2X
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
Definition: PPCISelLowering.h:549
llvm::PPCSubtarget::hasInvariantFunctionDescriptors
bool hasInvariantFunctionDescriptors() const
Definition: PPCSubtarget.h:311
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2343
llvm::PPCISD::ADDIS_TLSGD_HA
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
Definition: PPCISelLowering.h:348
llvm::PPCISD::CALL_NOP_RM
@ CALL_NOP_RM
Definition: PPCISelLowering.h:206
llvm::SelectionDAG::getTargetJumpTable
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:708
llvm::PPCISD::ADDI_DTPREL_L
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
Definition: PPCISelLowering.h:404
llvm::APInt::zext
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:973
llvm::ISD::FRAMEADDR
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
llvm::AtomicSDNode
This is an SDNode representing atomic operations.
Definition: SelectionDAGNodes.h:1429
llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:80
llvm::PPC::isVPKUDUMShuffleMask
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
Definition: PPCISelLowering.cpp:1888
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:913
llvm::EVT::getEVTString
std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
Definition: ValueTypes.cpp:152
llvm::APInt::bitsToDouble
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1620
llvm::PPCISD::FCTIWUZ
@ FCTIWUZ
Definition: PPCISelLowering.h:78
llvm::PPCTargetLowering::isFPExtFree
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
Definition: PPCISelLowering.cpp:16639
CallingConv.h
llvm::PPCISD::ADDI_TLSLD_L_ADDR
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
Definition: PPCISelLowering.h:394
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::HexagonISD::CP
@ CP
Definition: HexagonISelLowering.h:53
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:347
getLabelAccessInfo
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
Definition: PPCISelLowering.cpp:3069
llvm::isInt< 16 >
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:370
llvm::StringRef::size
constexpr LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:157
llvm::TargetLowering::CallLoweringInfo::IsTailCall
bool IsTailCall
Definition: TargetLowering.h:4036
llvm::SDNode::isOnlyUserOf
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
Definition: SelectionDAG.cpp:10882
j
return j(j<< 16)
llvm::EVT::getHalfNumVectorElementsVT
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:420
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1447
llvm::PPCISD::FSEL
@ FSEL
FSEL - Traditional three-operand fsel node.
Definition: PPCISelLowering.h:52
llvm::DataLayout::getIntPtrType
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:842
llvm::PPCISD::RET_FLAG
@ RET_FLAG
Return with a flag operand, matched by 'blr'.
Definition: PPCISelLowering.h:212
llvm::isAllOnesConstant
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition: SelectionDAG.cpp:10624
llvm::PPCISD::TOC_ENTRY
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
Definition: PPCISelLowering.h:600
Constant.h
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2516
llvm::CCState::getFirstUnallocated
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
Definition: CallingConvLower.h:334
llvm::CCValAssign::isMemLoc
bool isMemLoc() const
Definition: CallingConvLower.h:144
llvm::PPCTargetLowering::CallFlags::IsVarArg
const bool IsVarArg
Definition: PPCISelLowering.h:1164
setXFormForUnalignedFI
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
Definition: PPCISelLowering.cpp:17941
llvm::PPC::isXXBRQShuffleMask
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
Definition: PPCISelLowering.cpp:2377
llvm::TargetLoweringBase::IntrinsicInfo
Definition: TargetLowering.h:1030
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::ISD::STRICT_FMUL
@ STRICT_FMUL
Definition: ISDOpcodes.h:402
PPCCallingConv.h
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition: ISDOpcodes.h:405
llvm::ISD::ArgFlagsTy::getByValSize
unsigned getByValSize() const
Definition: TargetCallingConv.h:169
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:944
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
llvm::PPCFunctionInfo::FixedType
@ FixedType
Definition: PPCMachineFunctionInfo.h:27
getMaxByValAlign
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
Definition: PPCISelLowering.cpp:1556
llvm::PPCISD::STRICT_FCTIWUZ
@ STRICT_FCTIWUZ
Definition: PPCISelLowering.h:494
llvm::PPC::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
Definition: PPCFastISel.cpp:2467
llvm::PPCTargetLowering::SelectAddressRegImm
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
Definition: PPCISelLowering.cpp:2730
llvm::SelectionDAG::computeKnownBits
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition: SelectionDAG.cpp:2890
llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition: TargetLowering.h:2321
llvm::GlobalAddressSDNode
Definition: SelectionDAGNodes.h:1739
llvm::KnownBits
Definition: KnownBits.h:23
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
EnableQuadwordAtomics
static cl::opt< bool > EnableQuadwordAtomics("ppc-quadword-atomics", cl::desc("enable quadword lock-free atomic operations"), cl::init(false), cl::Hidden)
llvm::PPCISD::BCTRL_LOAD_TOC_RM
@ BCTRL_LOAD_TOC_RM
Definition: PPCISelLowering.h:209
llvm::ISD::EXTRACT_SUBVECTOR
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:572
llvm::SDNode::getNumOperands
unsigned getNumOperands() const
Return the number of values used by this operation.
Definition: SelectionDAGNodes.h:896
llvm::AIXCCState::isFixed
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition: TargetLowering.h:249
llvm::ISD::isEXTLoad
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
Definition: SelectionDAGNodes.h:3038
llvm::PPCSubtarget::useSoftFloat
bool useSoftFloat() const
Definition: PPCSubtarget.h:246
uint16_t
llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition: TargetLowering.h:2518
llvm::PPCISD::STBRX
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
Definition: PPCISelLowering.h:514
CallingConvLower.h
rotate
The same transformation can work with an even modulo with the addition of a rotate
Definition: README.txt:680
llvm::TargetLowering::CallLoweringInfo::setZExtResult
CallLoweringInfo & setZExtResult(bool Value=true)
Definition: TargetLowering.h:4154
llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition: SelectionDAG.cpp:10614
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:295
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PPCTargetLowering::useLoadStackGuardNode
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
Definition: PPCISelLowering.cpp:16920
llvm::ISD::BR
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:982
llvm::ISD::TargetExternalSymbol
@ TargetExternalSymbol
Definition: ISDOpcodes.h:169
getPPCStrictOpcode
static unsigned getPPCStrictOpcode(unsigned Opc)
Definition: PPCISelLowering.cpp:8021
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::object::BCTR
@ BCTR
Definition: ELF.h:91
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:916
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:531
llvm::MachineMemOperand::getSize
uint64_t getSize() const
Return the size in bytes of the memory reference.
Definition: MachineMemOperand.h:235
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:924
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1339
llvm::TargetLowering::ConstraintWeight
ConstraintWeight
Definition: TargetLowering.h:4440
llvm::PPCISD::FCTIDUZ
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
Definition: PPCISelLowering.h:77
getSToVPermuted
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:14815
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:915
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:452
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:98
llvm::PPCISD::ADDIS_DTPREL_HA
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
Definition: PPCISelLowering.h:399
ISDOpcodes.h
Success
#define Success
Definition: AArch64Disassembler.cpp:280
isBLACompatibleAddress
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
Definition: PPCISelLowering.cpp:4952
llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition: Instructions.h:735
Enabled
static bool Enabled
Definition: Statistic.cpp:46
llvm::PPCISD::ANDI_rec_1_EQ_BIT
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
Definition: PPCISelLowering.h:262
llvm::APInt::isNegatedPowerOf2
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:434
llvm::AIXCCState
Definition: PPCCCState.h:41
llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:264
llvm::ISD::INLINEASM_BR
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1028
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:514
Casting.h
llvm::PPCFunctionInfo::getVarArgsNumGPR
unsigned getVarArgsNumGPR() const
Definition: PPCMachineFunctionInfo.h:230
llvm::ISD::STRICT_FCEIL
@ STRICT_FCEIL
Definition: ISDOpcodes.h:425
llvm::ISD::ArgFlagsTy::isSExt
bool isSExt() const
Definition: TargetCallingConv.h:76
llvm::PPCTargetLowering::BuildSDIVPow2
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Definition: PPCISelLowering.cpp:15847
Function.h
llvm::CCState::AllocateStack
unsigned AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
Definition: CallingConvLower.h:423
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:200
llvm::PPCFrameLowering::getFramePointerSaveOffset
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
Definition: PPCFrameLowering.cpp:2682
llvm::PPCTargetLowering::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Definition: PPCISelLowering.cpp:16652
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::TargetLowering::DAGCombinerInfo::AddToWorklist
void AddToWorklist(SDNode *N)
Definition: DAGCombiner.cpp:833
llvm::PPC::MOF_SubWordInt
@ MOF_SubWordInt
Definition: PPCISelLowering.h:716
llvm::SelectionDAG::getTargetExternalSymbol
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.cpp:1829
llvm::SelectionDAG::getMCSymbol
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
Definition: SelectionDAG.cpp:1820
llvm::PPCISD::SWAP_NO_CHAIN
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
Definition: PPCISelLowering.h:439
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:48
llvm::CCState::getNextStackOffset
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
Definition: CallingConvLower.h:262
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1434
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::PPCISD::NodeType
NodeType
Definition: PPCISelLowering.h:46
llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41
llvm::ISD::SDIV
@ SDIV
Definition: ISDOpcodes.h:242
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:138
usePartialVectorLoads
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
Definition: PPCISelLowering.cpp:2937
llvm::PPC::isVPKUWUMShuffleMask
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
Definition: PPCISelLowering.cpp:1851
llvm::PPCFunctionInfo::VectorFloat
@ VectorFloat
Definition: PPCMachineFunctionInfo.h:33
llvm::SDNode::getNumValues
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
Definition: SelectionDAGNodes.h:968
llvm::PPCSubtarget::hasSPE
bool hasSPE() const
Definition: PPCSubtarget.h:277
llvm::TargetLoweringBase::shouldExpandAtomicCmpXchgInIR
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Definition: TargetLowering.h:2082
llvm::TLSModel::Model
Model
Definition: CodeGen.h:42
llvm::MCSymbolRefExpr::create
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:386
llvm::TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic
@ MaskedIntrinsic
llvm::PPCISD::CALL
@ CALL
CALL - A direct function call.
Definition: PPCISelLowering.h:186
llvm::TargetLoweringBase::ZeroOrOneBooleanContent
@ ZeroOrOneBooleanContent
Definition: TargetLowering.h:232
StringSwitch.h
llvm::SDNodeFlags
These are IR-level optimization flags that may be propagated to SDNodes.
Definition: SelectionDAGNodes.h:371
llvm::PPCISD::CR6UNSET
@ CR6UNSET
Definition: PPCISelLowering.h:316
llvm::PPCTargetLowering::insertSSPDeclarations
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Definition: PPCISelLowering.cpp:16928
llvm::TargetLoweringBase::getSchedulingPreference
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
Definition: TargetLowering.h:881
llvm::PPCISD::BCTRL_LOAD_TOC
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
Definition: PPCISelLowering.h:201
llvm::TargetLowering::CW_Invalid
@ CW_Invalid
Definition: TargetLowering.h:4442
llvm::PPC::DIR_PWR7
@ DIR_PWR7
Definition: PPCSubtarget.h:60
transformCallee
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:5283
llvm::PPC::MOF_RPlusSImm16Mult4
@ MOF_RPlusSImm16Mult4
Definition: PPCISelLowering.h:708
llvm::PPCISD::BCTRL_RM
@ BCTRL_RM
Definition: PPCISelLowering.h:208
combineADDToMAT_PCREL_ADDR
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:17134
llvm::TargetLoweringBase::setJumpIsExpensive
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
Definition: TargetLoweringBase.cpp:942
llvm::PPC::DIR_PWR5
@ DIR_PWR5
Definition: PPCSubtarget.h:56
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::PPCII::MO_HA
@ MO_HA
Definition: PPC.h:161
llvm::PPCISD::LOAD_VEC_BE
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
Definition: PPCISelLowering.h:560
PPCISelLowering.h
llvm::PPCTargetMachine
Common code between 32-bit and 64-bit PowerPC targets.
Definition: PPCTargetMachine.h:25
llvm::SmallSet::end
const_iterator end() const
Definition: SmallSet.h:231
llvm::TargetLoweringBase::setLibcallName
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
Definition: TargetLowering.h:3074
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:967
llvm::ISD::VACOPY
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1082
llvm::CallBase::getCalledOperand
Value * getCalledOperand() const
Definition: InstrTypes.h:1389
llvm::TargetRegisterInfo::getNoPreservedMask
virtual const uint32_t * getNoPreservedMask() const
Return a register mask that clobbers everything.
Definition: TargetRegisterInfo.h:494
llvm::TargetLowering::CallLoweringInfo::NoMerge
bool NoMerge
Definition: TargetLowering.h:4032
llvm::Function::arg_begin
arg_iterator arg_begin()
Definition: Function.h:741
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:136
llvm::PPC::PRED_UN
@ PRED_UN
Definition: PPCPredicates.h:33
llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition: ISDOpcodes.h:751
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:439
llvm::PPCISD::ADDIS_TLSLD_HA
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
Definition: PPCISelLowering.h:378
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:774
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1162
llvm::TargetLoweringBase::setHasMultipleConditionRegisters
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
Definition: TargetLowering.h:2262
llvm::codeview::ModifierOptions::Const
@ Const
llvm::PPC::MOF_WordInt
@ MOF_WordInt
Definition: PPCISelLowering.h:717
llvm::PPCISD::ADDI_TLSGD_L
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
Definition: PPCISelLowering.h:354
llvm::ISD::STRICT_FFLOOR
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:426
llvm::checkConvertToNonDenormSingle
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
Definition: PPCISelLowering.cpp:9117
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:475
llvm::APInt::abs
APInt abs() const
Get the absolute value.
Definition: APInt.h:1686
llvm::PPCTargetLowering::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Definition: PPCISelLowering.cpp:16603
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::FPOpFusion::Fast
@ Fast
Definition: TargetOptions.h:37
CodeGen.h
llvm::PPCISD::EXTSWSLI
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
Definition: PPCISelLowering.h:174
callsShareTOCBase
static bool callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM)
Definition: PPCISelLowering.cpp:4664
getCanonicalConstSplat
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
Definition: PPCISelLowering.cpp:8940
llvm::PPCISD::TLSGD_AIX
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
Definition: PPCISelLowering.h:373
llvm::TLSModel::InitialExec
@ InitialExec
Definition: CodeGen.h:45
getBaseWithConstantOffset
static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, int64_t &Offset, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:13142
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:28
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:300
llvm::MachineFunction::getPICBaseSymbol
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
Definition: MachineFunction.cpp:717
llvm::PPC::MOF_RPlusSImm16
@ MOF_RPlusSImm16
Definition: PPCISelLowering.h:706
llvm::SDNode::hasNUsesOfValue
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
Definition: SelectionDAG.cpp:10853
llvm::PPC::DIR_PWR6
@ DIR_PWR6
Definition: PPCSubtarget.h:58
llvm::MemSDNode::getAlign
Align getAlign() const
Definition: SelectionDAGNodes.h:1278
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:222
llvm::PPCISD::MFOCRF
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
Definition: PPCISelLowering.h:217
llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition: TargetLowering.h:2221
llvm::ISD::SETOGE
@ SETOGE
Definition: ISDOpcodes.h:1429
llvm::LegalityPredicates::isVector
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Definition: LegalityPredicates.cpp:73
llvm::SDNode::op_begin
op_iterator op_begin() const
Definition: SelectionDAGNodes.h:916
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:871
llvm::PPC::isVMRGLShuffleMask
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
Definition: PPCISelLowering.cpp:1955
llvm::TargetOptions::NoNaNsFPMath
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
Definition: TargetOptions.h:175
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:105
llvm::PPCISD::VECINSERT
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
Definition: PPCISelLowering.h:117
getOutputChainFromCallSeq
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
Definition: PPCISelLowering.cpp:5361
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
llvm::LSBaseSDNode
Base class for LoadSDNode and StoreSDNode.
Definition: SelectionDAGNodes.h:2291
llvm::PPCISD::CALL_NOTOC
@ CALL_NOTOC
Definition: PPCISelLowering.h:188
Instructions.h
llvm::PPCSubtarget::isUsingPCRelativeCalls
bool isUsingPCRelativeCalls() const
Definition: PPCSubtarget.cpp:257
llvm::APFloat::convert
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4843
llvm::PPCTargetLowering::emitTrailingFence
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Definition: PPCISelLowering.cpp:11352
llvm::CC_PPC32_SVR4
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:391
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::MVT::f128
@ f128
Definition: MachineValueType.h:60
combineBVZEXTLOAD
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:14322
llvm::ISD::PREFETCH
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1146
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:691
llvm::GlobalAlias::getAliaseeObject
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:551
truncateScalarIntegerArg
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
Definition: PPCISelLowering.cpp:6839
llvm::ISD::READCYCLECOUNTER
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1113
PrepareTailCall
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
Definition: PPCISelLowering.cpp:5094
llvm::PPCISD::SINT_VEC_TO_FP
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
Definition: PPCISelLowering.h:244
llvm::PPCSubtarget::hasAltivec
bool hasAltivec() const
Definition: PPCSubtarget.h:276
SmallVector.h
llvm::ISD::FREM
@ FREM
Definition: ISDOpcodes.h:394
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition: MachineOperand.cpp:1006
llvm::PPC::AM_PCRel
@ AM_PCRel
Definition: PPCISelLowering.h:738
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:278
llvm::PPCISD::LD_SPLAT
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
Definition: PPCISelLowering.h:568
MachineInstrBuilder.h
isGPRShadowAligned
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
Definition: PPCISelLowering.cpp:6551
addShuffleForVecExtend
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
Definition: PPCISelLowering.cpp:14185
llvm::PPCFunctionInfo::LongFloatingPoint
@ LongFloatingPoint
Definition: PPCMachineFunctionInfo.h:29
llvm::MCSymbolXCOFF
Definition: MCSymbolXCOFF.h:20
llvm::InlineAsm::getNumOperandRegisters
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag.
Definition: InlineAsm.h:355
llvm::PPCISD::SHL
@ SHL
Definition: PPCISelLowering.h:167
llvm::PPCISD::VEXTS
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
Definition: PPCISelLowering.h:86
llvm::ISD::isUnsignedIntSetCC
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1463
llvm::ISD::ArgFlagsTy::getNonZeroByValAlign
Align getNonZeroByValAlign() const
Definition: TargetCallingConv.h:153
llvm::CCValAssign::getReg
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
Definition: CallingConvLower.h:77
llvm::ISD::MUL
@ MUL
Definition: ISDOpcodes.h:241
llvm::ISD::UREM
@ UREM
Definition: ISDOpcodes.h:245
llvm::PPCFunctionInfo::VectorShort
@ VectorShort
Definition: PPCMachineFunctionInfo.h:31
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:198
llvm::ISD::ZERO_EXTEND_VECTOR_INREG
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:814
llvm::PPCSubtarget::getEnvironmentPointerRegister
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:418
llvm::CCValAssign::getValVT
MVT getValVT() const
Definition: CallingConvLower.h:141
llvm::PPCSubtarget::descriptorEnvironmentPointerOffset
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:412
llvm::MVT::f16
@ f16
Definition: MachineValueType.h:56
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::TargetLowering::useLoadStackGuardNode
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
Definition: TargetLowering.h:4936
N
#define N
llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition: TargetLoweringBase.cpp:1277
llvm::MCSectionXCOFF::getQualNameSymbol
MCSymbolXCOFF * getQualNameSymbol() const
Definition: MCSectionXCOFF.h:110
llvm::ISD::BITREVERSE
@ BITREVERSE
Definition: ISDOpcodes.h:704
llvm::TargetLoweringBase::setMaxAtomicSizeInBitsSupported
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Definition: TargetLowering.h:2477
CreateCopyOfByValArgument
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
Definition: PPCISelLowering.cpp:5059
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::PPC::DIR_E500
@ DIR_E500
Definition: PPCSubtarget.h:51
llvm::PPCTargetLowering::decomposeMulByConstant
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
Definition: PPCISelLowering.cpp:16694
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:693
llvm::PPCTargetLowering::SelectForceXFormMode
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Definition: PPCISelLowering.cpp:17775
llvm::PPCTargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: PPCISelLowering.cpp:12198
isFloatingPointZero
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is 0.0 or -0.0.
Definition: PPCISelLowering.cpp:1796
llvm::PPCTargetLowering::isZExtFree
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Definition: PPCISelLowering.cpp:16619
llvm::PPCISD::PAIR_BUILD
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
Definition: PPCISelLowering.h:479
llvm::SelectionDAG::getRegisterMask
SDValue getRegisterMask(const uint32_t *RegMask)
Definition: SelectionDAG.cpp:2076
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::ISD::ArgFlagsTy::isInConsecutiveRegs
bool isInConsecutiveRegs() const
Definition: TargetCallingConv.h:124
llvm::PPCTargetLowering::useSoftFloat
bool useSoftFloat() const override
Definition: PPCISelLowering.cpp:1595
llvm::TargetLowering::DAGCombinerInfo::CombineTo
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
Definition: DAGCombiner.cpp:838
llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition: TargetLowering.h:2404
llvm::ISD::CTTZ
@ CTTZ
Definition: ISDOpcodes.h:701
llvm::TargetLoweringBase::getRegClassFor
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
Definition: TargetLowering.h:894
llvm::PPCISD::FRE
@ FRE
Reciprocal estimate instructions (unary FP ops).
Definition: PPCISelLowering.h:89
PPCMachineFunctionInfo.h
llvm::ISD::TargetJumpTable
@ TargetJumpTable
Definition: ISDOpcodes.h:167
llvm::ISD::STRICT_FADD
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition: MachineFunction.cpp:285
llvm::PPCISD::ST_VSR_SCAL_INT
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
Definition: PPCISelLowering.h:589
llvm::PPC::DIR_PWR10
@ DIR_PWR10
Definition: PPCSubtarget.h:63
llvm::PPCTargetLowering::shouldExpandAtomicCmpXchgInIR
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Definition: PPCISelLowering.cpp:18116
llvm::PPCISD::XXPERMDI
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
Definition: PPCISelLowering.h:125
PPC
should just be implemented with a CLZ instruction Since there are other e PPC
Definition: README.txt:709
isPCRelNode
static bool isPCRelNode(SDValue N)
Definition: PPCISelLowering.cpp:17648
llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:220
llvm::MVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
Definition: MachineValueType.h:368
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2514
llvm::InlineAsm::Kind_RegDef
@ Kind_RegDef
Definition: InlineAsm.h:239
llvm::PPCFunctionInfo::getFramePointerSaveIndex
int getFramePointerSaveIndex() const
Definition: PPCMachineFunctionInfo.h:161
llvm::TargetLoweringBase::isZExtFree
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition: TargetLowering.h:2761
llvm::StringRef::data
const LLVM_NODISCARD char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:149
llvm::KnownBits::isConstant
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
llvm::ISD::isZEXTLoad
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
Definition: SelectionDAGNodes.h:3050
GeneratePerfectShuffle
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
Definition: PPCISelLowering.cpp:9467
llvm::ISD::UMIN
@ UMIN
Definition: ISDOpcodes.h:662
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:160
widenVec
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
Definition: PPCISelLowering.cpp:8412
llvm::PPC::MOF_NoExt
@ MOF_NoExt
Definition: PPCISelLowering.h:702
MachineMemOperand.h
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::ISD::MULHU
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:637
llvm::TargetOptions::UnsafeFPMath
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
Definition: TargetOptions.h:163
llvm::PPCISD::GET_TLS_ADDR
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
Definition: PPCISelLowering.h:359
llvm::TargetLowering::CallLoweringInfo::OutVals
SmallVector< SDValue, 32 > OutVals
Definition: TargetLowering.h:4049
MachineOperand.h
RegName
#define RegName(no)
llvm::PPCISD::BCTRL
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
Definition: PPCISelLowering.h:196
llvm::PPCTargetLowering::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
Definition: PPCISelLowering.cpp:16836
llvm::PPCFunctionInfo::ShortFloatingPoint
@ ShortFloatingPoint
Definition: PPCMachineFunctionInfo.h:28
isFPExtLoad
static bool isFPExtLoad(SDValue Op)
Definition: PPCISelLowering.cpp:13994
llvm::ISD::LROUND
@ LROUND
Definition: ISDOpcodes.h:931
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:661
llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition: MachineBasicBlock.cpp:886
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1122
llvm::PPCFunctionInfo::setLRStoreRequired
void setLRStoreRequired()
Definition: PPCMachineFunctionInfo.h:215
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:652
DerivedTypes.h
invertFMAOpcode
static unsigned invertFMAOpcode(unsigned Opc)
Definition: PPCISelLowering.cpp:16843
llvm::PPC::MOF_AddrIsSImm32
@ MOF_AddrIsSImm32
Definition: PPCISelLowering.h:713
llvm::PPCFunctionInfo::setFramePointerSaveIndex
void setFramePointerSaveIndex(int Idx)
Definition: PPCMachineFunctionInfo.h:162
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
isTOCSaveRestoreRequired
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:5217
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1432
llvm::PPCSubtarget::POPCNTD_Fast
@ POPCNTD_Fast
Definition: PPCSubtarget.h:76
llvm::TargetLowering::CallLoweringInfo::Callee
SDValue Callee
Definition: TargetLowering.h:4043
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:256
llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:289
llvm::PPCFunctionInfo::setVarArgsFrameIndex
void setVarArgsFrameIndex(int Index)
Definition: PPCMachineFunctionInfo.h:225
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:47
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::PPCSubtarget::useLongCalls
bool useLongCalls() const
Definition: PPCSubtarget.h:341
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
getRegClassForSVT
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
Definition: PPCISelLowering.cpp:6810
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:911
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:449
llvm::SelectionDAG::ComputeNumSignBits
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
Definition: SelectionDAG.cpp:3914
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::PPCISD::CMPB
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
Definition: PPCISelLowering.h:128
llvm::LoadInst::isUnordered
bool isUnordered() const
Definition: Instructions.h:255
llvm::PPCISD::GET_TLSLD_ADDR
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
Definition: PPCISelLowering.h:389
llvm::TargetMachine::getCodeModel
CodeModel::Model getCodeModel() const
Returns the code model.
Definition: TargetMachine.h:225
llvm::PPCISD::ATOMIC_CMP_SWAP_16
@ ATOMIC_CMP_SWAP_16
Definition: PPCISelLowering.h:595
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::PPCSubtarget::getCPUDirective
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:206
llvm::PPCSubtarget::hasQuadwordAtomics
bool hasQuadwordAtomics() const
Definition: PPCSubtarget.h:317
llvm::PPCSubtarget::allowsUnalignedFPAccess
bool allowsUnalignedFPAccess() const
Definition: PPCSubtarget.h:308
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
llvm::ISD::VAARG
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1077
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::InlineAsm::Kind_RegUse
@ Kind_RegUse
Definition: InlineAsm.h:238
llvm::SelectionDAG::getExternalSymbol
SDValue getExternalSymbol(const char *Sym, EVT VT)
Definition: SelectionDAG.cpp:1812
llvm::PPCISD::FP_TO_UINT_IN_VSR
@ FP_TO_UINT_IN_VSR
Floating-point-to-integer conversion instructions.
Definition: PPCISelLowering.h:81
llvm::TargetLoweringBase::MaxLoadsPerMemcmp
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
Definition: TargetLowering.h:3372
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition: MachineFrameInfo.h:371
llvm::Sched::Preference
Preference
Definition: TargetLowering.h:97
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::ISD::isNormalStore
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
Definition: SelectionDAGNodes.h:3063
llvm::PPCII::MO_TPREL_FLAG
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set the symbol reference is relative to TLS Initial Exec model.
Definition: PPC.h:131
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::PPC::DIR_440
@ DIR_440
Definition: PPCSubtarget.h:43
llvm::NVPTX::VecShuffle
@ VecShuffle
Definition: NVPTX.h:88
llvm::cl::desc
Definition: CommandLine.h:405
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
llvm::MVT::fp_valuetypes
static auto fp_valuetypes()
Definition: MachineValueType.h:1467
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1462
llvm::M1
unsigned M1(unsigned Val)
Definition: VE.h:370
llvm::PPC::get_VSPLTI_elt
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
Definition: PPCISelLowering.cpp:2465
llvm::PPCTargetLowering::getRegForInlineAsmConstraint
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Definition: PPCISelLowering.cpp:16076
needStackSlotPassParameters
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
Definition: PPCISelLowering.cpp:4744
llvm::PPC::MOF_SubtargetBeforeP9
@ MOF_SubtargetBeforeP9
Definition: PPCISelLowering.h:724
llvm::PPCSubtarget::hasLFIWAX
bool hasLFIWAX() const
Definition: PPCSubtarget.h:273
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:760
raw_ostream.h
llvm::PPCII::MO_TPREL_LO
@ MO_TPREL_LO
Definition: PPC.h:163
llvm::PPCISD::STRICT_FCFID
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
Definition: PPCISelLowering.h:497
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:241
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:85
llvm::PPCISD::FTSQRT
@ FTSQRT
Test instruction for software square root.
Definition: PPCISelLowering.h:93
llvm::TargetLoweringBase::shouldExpandBuildVectorWithShuffles
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
Definition: TargetLowering.h:487
llvm::PPCTargetLowering::getTargetNodeName
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
Definition: PPCISelLowering.cpp:1607
isVMerge
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
Definition: PPCISelLowering.cpp:1931
llvm::SDNodeFlags::hasNoSignedZeros
bool hasNoSignedZeros() const
Definition: SelectionDAGNodes.h:429
MachineFunction.h
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:925
setAlignFlagsForFI
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
Definition: PPCISelLowering.cpp:17566
buildCallOperands
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:5481
convertFPToInt
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
Definition: PPCISelLowering.cpp:8044
PPCPredicates.h
llvm::PPCISD::EH_SJLJ_SETJMP
@ EH_SJLJ_SETJMP
Definition: PPCISelLowering.h:270
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:496
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:95
llvm::SDNode::getFlags
SDNodeFlags getFlags() const
Definition: SelectionDAGNodes.h:960
llvm::LLT::scalar
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelTypeImpl.h:42
llvm::PPCTargetLowering::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
Definition: PPCISelLowering.cpp:16648
Value.h
llvm::SelectionDAG::getStackArgumentTokenFactor
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
Definition: SelectionDAG.cpp:6564
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:523
llvm::MachineMemOperand::MONone
@ MONone
Definition: MachineMemOperand.h:132
MCExpr.h
llvm::TargetLowering::getSqrtInputTest
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
Definition: TargetLowering.cpp:6638
llvm::PPCTargetLowering::isFMAFasterThanFMulAndFAdd
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
Definition: PPCISelLowering.cpp:16721
llvm::ISD::STACKSAVE
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1053
llvm::TargetLoweringBase::MaxStoresPerMemmove
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
Definition: TargetLowering.h:3386
llvm::PPCTargetLowering::shouldInlineQuadwordAtomics
bool shouldInlineQuadwordAtomics() const
Definition: PPCISelLowering.cpp:18099
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:919
llvm::PPCTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: PPCISelLowering.cpp:15100
llvm::SelectionDAG::getExtLoad
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7841
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:852
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:57
llvm::codegen::getCodeModel
CodeModel::Model getCodeModel()
llvm::MachinePointerInfo::getStack
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition: MachineOperand.cpp:1019
llvm::TargetLoweringBase::setPrefLoopAlignment
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
Definition: TargetLowering.h:2463
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::ISD::ROTR
@ ROTR
Definition: ISDOpcodes.h:695
llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition: MachineMemOperand.h:219
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:510
llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:453
TargetRegisterInfo.h
llvm::PPCFunctionInfo::getReturnAddrSaveIndex
int getReturnAddrSaveIndex() const
Definition: PPCMachineFunctionInfo.h:164
llvm::PPCISD::TLS_LOCAL_EXEC_MAT_ADDR
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
Definition: PPCISelLowering.h:473
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Debug.h
llvm::PPCFunctionInfo::setMinReservedArea
void setMinReservedArea(unsigned size)
Definition: PPCMachineFunctionInfo.h:181
llvm::EVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:139
llvm::SystemZISD::TBEGIN
@ TBEGIN
Definition: SystemZISelLowering.h:154
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1159
llvm::TargetRegisterInfo::getCallPreservedMask
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
Definition: TargetRegisterInfo.h:480
llvm::PPCISD::ADDI_TLSGD_L_ADDR
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
Definition: PPCISelLowering.h:364
isConsecutiveLS
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
Definition: PPCISelLowering.cpp:13194
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:280
isValidPCRelNode
static bool isValidPCRelNode(SDValue N)
Definition: PPCISelLowering.cpp:2914
PPCTargetMachine.h
llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition: SelectionDAGNodes.h:1348
llvm::CCValAssign::needsCustom
bool needsCustom() const
Definition: CallingConvLower.h:146
llvm::PPCISD::STRICT_FCTIWZ
@ STRICT_FCTIWZ
Definition: PPCISelLowering.h:492
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:606
llvm::PPCSubtarget::hasFRSQRTE
bool hasFRSQRTE() const
Definition: PPCSubtarget.h:269
llvm::SrcOp
Definition: MachineIRBuilder.h:126
llvm::PPCSubtarget::pairedVectorMemops
bool pairedVectorMemops() const
Definition: PPCSubtarget.h:293
llvm::PPCISD::COND_BRANCH
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
Definition: PPCISelLowering.h:292
llvm::PPCSubtarget::use64BitRegs
bool use64BitRegs() const
use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit registers in 32-bit mode when...
Definition: PPCSubtarget.h:255
llvm::TargetLowering::getConstraintType
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Definition: TargetLowering.cpp:5086
llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: TargetLowering.h:354
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:703
llvm::PPCTargetLowering::emitProbedAlloca
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
Definition: PPCISelLowering.cpp:12036
llvm::Module::getNamedValue
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type.
Definition: Module.cpp:110
llvm::PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
Definition: PPCISelLowering.h:468
llvm::SelectionDAG::getSetCC
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:1081
llvm::ISD::SADDSAT
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
llvm::TargetLoweringBase::LibCall
@ LibCall
Definition: TargetLowering.h:199
llvm::ISD::TokenFactor
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
llvm::SelectionDAG::getMergeValues
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition: SelectionDAG.cpp:7588
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::PPCISD::FRSQRTE
@ FRSQRTE
Definition: PPCISelLowering.h:90
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::ISD::EXTRACT_ELEMENT
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
getNormalLoadInput
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
Definition: PPCISelLowering.cpp:9073
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
llvm::PPCISD::VECSHL
@ VECSHL
VECSHL - The PPC vector shift left instruction.
Definition: PPCISelLowering.h:121
llvm::ISD::isBuildVectorAllZeros
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
Definition: SelectionDAG.cpp:264
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::PPCSubtarget::hasP10Vector
bool hasP10Vector() const
Definition: PPCSubtarget.h:287
llvm::PPCISD::FCFIDS
@ FCFIDS
Definition: PPCISelLowering.h:66
llvm::MVT::getIntegerVT
static MVT getIntegerVT(unsigned BitWidth)
Definition: MachineValueType.h:1193
llvm::TLSModel::LocalExec
@ LocalExec
Definition: CodeGen.h:46
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::PPCSubtarget::isELFv2ABI
bool isELFv2ABI() const
Definition: PPCSubtarget.cpp:254
llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:722
llvm::SelectionDAG::getCondCode
SDValue getCondCode(ISD::CondCode Cond)
Definition: SelectionDAG.cpp:1839
TargetLoweringObjectFileImpl.h
llvm::ISD::FDIV
@ FDIV
Definition: ISDOpcodes.h:393
llvm::LLT
Definition: LowLevelTypeImpl.h:39